Merge "Revert "ART: Try to statically evaluate some conditions.""
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index f5a95fa..0896252 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -49,6 +49,11 @@
 # Enable the read barrier by default.
 ART_USE_READ_BARRIER ?= true
 
+# Default compact dex level to none.
+ifeq ($(ART_DEFAULT_COMPACT_DEX_LEVEL),)
+ART_DEFAULT_COMPACT_DEX_LEVEL := none
+endif
+
 ART_CPP_EXTENSION := .cc
 
 ifndef LIBART_IMG_HOST_BASE_ADDRESS
diff --git a/build/art.go b/build/art.go
index 5704b43..3f598da 100644
--- a/build/art.go
+++ b/build/art.go
@@ -66,6 +66,9 @@
 			"-DART_READ_BARRIER_TYPE_IS_"+barrierType+"=1")
 	}
 
+  cdexLevel := envDefault(ctx, "ART_DEFAULT_COMPACT_DEX_LEVEL", "none")
+  cflags = append(cflags, "-DART_DEFAULT_COMPACT_DEX_LEVEL="+cdexLevel)
+
 	// We need larger stack overflow guards for ASAN, as the compiled code will have
 	// larger frame sizes. For simplicity, just use global not-target-specific cflags.
 	// Note: We increase this for both debug and non-debug, as the overflow gap will
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 1536339..c438c54 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -244,7 +244,7 @@
   {
     const char* log_args = "-verbose:"
         "class,compiler,gc,heap,jdwp,jni,monitor,profiler,signals,simulator,startup,"
-        "third-party-jni,threads,verifier";
+        "third-party-jni,threads,verifier,verifier-debug";
 
     LogVerbosity log_verbosity = LogVerbosity();
     log_verbosity.class_linker = true;
@@ -261,6 +261,7 @@
     log_verbosity.third_party_jni = true;
     log_verbosity.threads = true;
     log_verbosity.verifier = true;
+    log_verbosity.verifier_debug = true;
 
     EXPECT_SINGLE_PARSE_VALUE(log_verbosity, log_args, M::Verbose);
   }
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 37bdcdc..f12ef97 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -669,6 +669,8 @@
         log_verbosity.threads = true;
       } else if (verbose_options[j] == "verifier") {
         log_verbosity.verifier = true;
+      } else if (verbose_options[j] == "verifier-debug") {
+        log_verbosity.verifier_debug = true;
       } else if (verbose_options[j] == "image") {
         log_verbosity.image = true;
       } else if (verbose_options[j] == "systrace-locks") {
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 3699d66..fc19b54 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -181,15 +181,10 @@
             ],
         },
     },
-    target: {
-        android: {
-            // For atrace.
-            shared_libs: ["libcutils"],
-        },
-    },
     generated_sources: ["art_compiler_operator_srcs"],
     shared_libs: [
         "libbase",
+        "libcutils",  // for atrace.
         "liblzma",
     ],
     include_dirs: ["art/disassembler"],
@@ -211,6 +206,7 @@
         "driver/compiler_options.h",
         "linker/linker_patch.h",
         "optimizing/locations.h",
+        "optimizing/optimizing_compiler_stats.h",
 
         "utils/arm/constants_arm.h",
         "utils/mips/assembler_mips.h",
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 500fc4a..40a5370 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -174,7 +174,6 @@
       }
     }
 
-    timer_.reset(new CumulativeLogger("Compilation times"));
     CreateCompilerDriver(compiler_kind_, instruction_set);
   }
 }
@@ -193,9 +192,6 @@
                                             GetCompiledClasses(),
                                             GetCompiledMethods(),
                                             number_of_threads,
-                                            /* dump_stats */ true,
-                                            /* dump_passes */ true,
-                                            timer_.get(),
                                             /* swap_fd */ -1,
                                             GetProfileCompilationInfo()));
   // We typically don't generate an image in unit tests, disable this optimization by default.
@@ -227,7 +223,6 @@
 }
 
 void CommonCompilerTest::TearDown() {
-  timer_.reset();
   compiler_driver_.reset();
   callbacks_.reset();
   verification_results_.reset();
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index bcda41a..05fdc97 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -106,7 +106,6 @@
   std::unique_ptr<CompilerOptions> compiler_options_;
   std::unique_ptr<VerificationResults> verification_results_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
-  std::unique_ptr<CumulativeLogger> timer_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
 
 
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index c500921..bb614ae 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -16,7 +16,9 @@
 
 #include "compiler.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "driver/compiler_driver.h"
 #include "optimizing/optimizing_compiler.h"
 #include "utils.h"
diff --git a/compiler/debug/dwarf/writer.h b/compiler/debug/dwarf/writer.h
index 95912ad..afeb980 100644
--- a/compiler/debug/dwarf/writer.h
+++ b/compiler/debug/dwarf/writer.h
@@ -19,8 +19,10 @@
 
 #include <type_traits>
 #include <vector>
+
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "leb128.h"
 
 namespace art {
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index d0c98a7..27b70c8 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -207,13 +207,12 @@
   }
 
   // Write .eh_frame/.debug_frame section.
-  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
-                       ? builder->GetDebugFrame()
-                       : builder->GetEhFrame());
+  const bool is_debug_frame = format == dwarf::DW_DEBUG_FRAME_FORMAT;
+  auto* cfi_section = (is_debug_frame ? builder->GetDebugFrame() : builder->GetEhFrame());
   {
     cfi_section->Start();
     const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
-    const Elf_Addr cfi_address = cfi_section->GetAddress();
+    const Elf_Addr cfi_address = (is_debug_frame ? 0 : cfi_section->GetAddress());
     const Elf_Addr cie_address = cfi_address;
     Elf_Addr buffer_address = cfi_address;
     std::vector<uint8_t> buffer;  // Small temporary buffer.
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index d599994..107ed48 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "art_field-inl.h"
+#include "code_item_accessors-inl.h"
 #include "debug/dwarf/debug_abbrev_writer.h"
 #include "debug/dwarf/debug_info_entry_writer.h"
 #include "debug/elf_compilation_unit.h"
@@ -48,10 +49,10 @@
 
 static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
   std::vector<const char*> names;
-  if (mi->code_item != nullptr) {
+  CodeItemDebugInfoAccessor accessor(mi->dex_file, mi->code_item);
+  if (accessor.HasCodeItem()) {
     DCHECK(mi->dex_file != nullptr);
-    uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*mi->dex_file, mi->code_item);
-    const uint8_t* stream = mi->dex_file->GetDebugInfoStream(debug_info_offset);
+    const uint8_t* stream = mi->dex_file->GetDebugInfoStream(accessor.DebugInfoOffset());
     if (stream != nullptr) {
       DecodeUnsignedLeb128(&stream);  // line.
       uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
@@ -162,7 +163,7 @@
     for (auto mi : compilation_unit.methods) {
       DCHECK(mi->dex_file != nullptr);
       const DexFile* dex = mi->dex_file;
-      const DexFile::CodeItem* dex_code = mi->code_item;
+      CodeItemDebugInfoAccessor accessor(dex, mi->code_item);
       const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index);
       const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
       const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
@@ -204,13 +205,13 @@
       // Decode dex register locations for all stack maps.
       // It might be expensive, so do it just once and reuse the result.
       std::vector<DexRegisterMap> dex_reg_maps;
-      if (dex_code != nullptr && mi->code_info != nullptr) {
+      if (accessor.HasCodeItem() && mi->code_info != nullptr) {
         const CodeInfo code_info(mi->code_info);
         CodeInfoEncoding encoding = code_info.ExtractEncoding();
         for (size_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); ++s) {
           const StackMap& stack_map = code_info.GetStackMapAt(s, encoding);
           dex_reg_maps.push_back(code_info.GetDexRegisterMapOf(
-              stack_map, encoding, dex_code->registers_size_));
+              stack_map, encoding, accessor.RegistersSize()));
         }
       }
 
@@ -224,9 +225,9 @@
         WriteName("this");
         info_.WriteFlagPresent(DW_AT_artificial);
         WriteLazyType(dex_class_desc);
-        if (dex_code != nullptr) {
+        if (accessor.HasCodeItem()) {
           // Write the stack location of the parameter.
-          const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+          const uint32_t vreg = accessor.RegistersSize() - accessor.InsSize() + arg_reg;
           const bool is64bitValue = false;
           WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.code_address);
         }
@@ -244,30 +245,31 @@
           const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_);
           WriteLazyType(type_desc);
           const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J';
-          if (dex_code != nullptr) {
+          if (accessor.HasCodeItem()) {
             // Write the stack location of the parameter.
-            const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
+            const uint32_t vreg = accessor.RegistersSize() - accessor.InsSize() + arg_reg;
             WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.code_address);
           }
           arg_reg += is64bitValue ? 2 : 1;
           info_.EndTag();
         }
-        if (dex_code != nullptr) {
-          DCHECK_EQ(arg_reg, dex_code->ins_size_);
+        if (accessor.HasCodeItem()) {
+          DCHECK_EQ(arg_reg, accessor.InsSize());
         }
       }
 
       // Write local variables.
       LocalInfos local_infos;
-      uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*dex, dex_code);
-      if (dex->DecodeDebugLocalInfo(dex_code,
-                                    debug_info_offset,
+      if (dex->DecodeDebugLocalInfo(accessor.RegistersSize(),
+                                    accessor.InsSize(),
+                                    accessor.InsnsSizeInCodeUnits(),
+                                    accessor.DebugInfoOffset(),
                                     is_static,
                                     mi->dex_method_index,
                                     LocalInfoCallback,
                                     &local_infos)) {
         for (const DexFile::LocalInfo& var : local_infos) {
-          if (var.reg_ < dex_code->registers_size_ - dex_code->ins_size_) {
+          if (var.reg_ < accessor.RegistersSize() - accessor.InsSize()) {
             info_.StartTag(DW_TAG_variable);
             WriteName(var.name_);
             WriteLazyType(var.descriptor_);
@@ -296,7 +298,7 @@
     CHECK_EQ(info_.Depth(), 0);
     std::vector<uint8_t> buffer;
     buffer.reserve(info_.data()->size() + KB);
-    const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+    const size_t offset = owner_->builder_->GetDebugInfo()->GetPosition();
     // All compilation units share single table which is at the start of .debug_abbrev.
     const size_t debug_abbrev_offset = 0;
     WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
@@ -461,7 +463,7 @@
     CHECK_EQ(info_.Depth(), 0);
     std::vector<uint8_t> buffer;
     buffer.reserve(info_.data()->size() + KB);
-    const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+    const size_t offset = owner_->builder_->GetDebugInfo()->GetPosition();
     // All compilation units share single table which is at the start of .debug_abbrev.
     const size_t debug_abbrev_offset = 0;
     WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index 943e03a..d7fd524 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -60,7 +60,7 @@
         ? builder_->GetText()->GetAddress()
         : 0;
 
-    compilation_unit.debug_line_offset = builder_->GetDebugLine()->GetSize();
+    compilation_unit.debug_line_offset = builder_->GetDebugLine()->GetPosition();
 
     std::vector<dwarf::FileEntry> files;
     std::unordered_map<std::string, size_t> files_map;
@@ -159,9 +159,9 @@
       PositionInfos dex2line_map;
       DCHECK(mi->dex_file != nullptr);
       const DexFile* dex = mi->dex_file;
-      uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*dex, mi->code_item);
-      if (!dex->DecodeDebugPositionInfo(
-              mi->code_item, debug_info_offset, PositionInfoCallback, &dex2line_map)) {
+      CodeItemDebugInfoAccessor accessor(dex, mi->code_item);
+      const uint32_t debug_info_offset = accessor.DebugInfoOffset();
+      if (!dex->DecodeDebugPositionInfo(debug_info_offset, PositionInfoCallback, &dex2line_map)) {
         continue;
       }
 
@@ -268,7 +268,7 @@
     }
     std::vector<uint8_t> buffer;
     buffer.reserve(opcodes.data()->size() + KB);
-    size_t offset = builder_->GetDebugLine()->GetSize();
+    size_t offset = builder_->GetDebugLine()->GetPosition();
     WriteDebugLineTable(directories, files, opcodes, offset, &buffer, &debug_line_patches_);
     builder_->GetDebugLine()->WriteFully(buffer.data(), buffer.size());
     return buffer.size();
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index bb856b2..1d609af 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -251,7 +251,10 @@
         // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind().
         // kInRegisterHigh and kInFpuRegisterHigh should be handled by
         // the special cases above and they should not occur alone.
-        LOG(ERROR) << "Unexpected register location kind: " << kind;
+        LOG(WARNING) << "Unexpected register location: " << kind
+                     << " (This can indicate either a bug in the dexer when generating"
+                     << " local variable information, or a bug in ART compiler."
+                     << " Please file a bug at go/art-bug)";
         break;
       }
       if (is64bitValue) {
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 33c46d7..a626729 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -108,29 +108,32 @@
 std::vector<uint8_t> MakeMiniDebugInfo(
     InstructionSet isa,
     const InstructionSetFeatures* features,
-    size_t rodata_size,
+    uint64_t text_address,
     size_t text_size,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
   if (Is64BitInstructionSet(isa)) {
     return MakeMiniDebugInfoInternal<ElfTypes64>(isa,
                                                  features,
-                                                 rodata_size,
+                                                 text_address,
                                                  text_size,
                                                  method_infos);
   } else {
     return MakeMiniDebugInfoInternal<ElfTypes32>(isa,
                                                  features,
-                                                 rodata_size,
+                                                 text_address,
                                                  text_size,
                                                  method_infos);
   }
 }
 
 template <typename ElfTypes>
-static std::vector<uint8_t> WriteDebugElfFileForMethodsInternal(
+static std::vector<uint8_t> MakeElfFileForJITInternal(
     InstructionSet isa,
     const InstructionSetFeatures* features,
-    const ArrayRef<const MethodDebugInfo>& method_infos) {
+    bool mini_debug_info,
+    const MethodDebugInfo& mi) {
+  CHECK_EQ(mi.is_code_address_text_relative, false);
+  ArrayRef<const MethodDebugInfo> method_infos(&mi, 1);
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
   linker::VectorOutputStream out("Debug ELF file", &buffer);
@@ -138,23 +141,34 @@
       new linker::ElfBuilder<ElfTypes>(isa, features, &out));
   // No program headers since the ELF file is not linked and has no allocated sections.
   builder->Start(false /* write_program_headers */);
-  WriteDebugInfo(builder.get(),
-                 method_infos,
-                 dwarf::DW_DEBUG_FRAME_FORMAT,
-                 false /* write_oat_patches */);
+  if (mini_debug_info) {
+    std::vector<uint8_t> mdi = MakeMiniDebugInfo(isa,
+                                                 features,
+                                                 mi.code_address,
+                                                 mi.code_size,
+                                                 method_infos);
+    builder->WriteSection(".gnu_debugdata", &mdi);
+  } else {
+    builder->GetText()->AllocateVirtualMemory(mi.code_address, mi.code_size);
+    WriteDebugInfo(builder.get(),
+                   method_infos,
+                   dwarf::DW_DEBUG_FRAME_FORMAT,
+                   false /* write_oat_patches */);
+  }
   builder->End();
   CHECK(builder->Good());
   return buffer;
 }
 
-std::vector<uint8_t> WriteDebugElfFileForMethods(
+std::vector<uint8_t> MakeElfFileForJIT(
     InstructionSet isa,
     const InstructionSetFeatures* features,
-    const ArrayRef<const MethodDebugInfo>& method_infos) {
+    bool mini_debug_info,
+    const MethodDebugInfo& method_info) {
   if (Is64BitInstructionSet(isa)) {
-    return WriteDebugElfFileForMethodsInternal<ElfTypes64>(isa, features, method_infos);
+    return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_info);
   } else {
-    return WriteDebugElfFileForMethodsInternal<ElfTypes32>(isa, features, method_infos);
+    return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_info);
   }
 }
 
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index d24ca9b..a47bf07 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -43,14 +43,15 @@
 std::vector<uint8_t> MakeMiniDebugInfo(
     InstructionSet isa,
     const InstructionSetFeatures* features,
-    size_t rodata_section_size,
+    uint64_t text_section_address,
     size_t text_section_size,
     const ArrayRef<const MethodDebugInfo>& method_infos);
 
-std::vector<uint8_t> WriteDebugElfFileForMethods(
+std::vector<uint8_t> MakeElfFileForJIT(
     InstructionSet isa,
     const InstructionSetFeatures* features,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
+    bool mini_debug_info,
+    const MethodDebugInfo& method_info);
 
 std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
diff --git a/compiler/debug/elf_gnu_debugdata_writer.h b/compiler/debug/elf_gnu_debugdata_writer.h
index 1cdf6b0..78b8e27 100644
--- a/compiler/debug/elf_gnu_debugdata_writer.h
+++ b/compiler/debug/elf_gnu_debugdata_writer.h
@@ -80,7 +80,7 @@
 static std::vector<uint8_t> MakeMiniDebugInfoInternal(
     InstructionSet isa,
     const InstructionSetFeatures* features,
-    size_t rodata_section_size,
+    typename ElfTypes::Addr text_section_address,
     size_t text_section_size,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
   std::vector<uint8_t> buffer;
@@ -88,11 +88,9 @@
   linker::VectorOutputStream out("Mini-debug-info ELF file", &buffer);
   std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder(
       new linker::ElfBuilder<ElfTypes>(isa, features, &out));
-  builder->Start();
-  // Mirror .rodata and .text as NOBITS sections.
-  // It is needed to detected relocations after compression.
-  builder->GetRoData()->WriteNoBitsSection(rodata_section_size);
-  builder->GetText()->WriteNoBitsSection(text_section_size);
+  builder->Start(false /* write_program_headers */);
+  // Mirror .text as NOBITS section since the added symbols will reference it.
+  builder->GetText()->AllocateVirtualMemory(text_section_address, text_section_size);
   WriteDebugSymbols(builder.get(), method_infos, false /* with_signature */);
   WriteCFISection(builder.get(),
                   method_infos,
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 0907e10..57e010f 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -79,8 +79,9 @@
       last_name_offset = name_offset;
     }
 
-    const auto* text = info.is_code_address_text_relative ? builder->GetText() : nullptr;
-    uint64_t address = info.code_address + (text != nullptr ? text->GetAddress() : 0);
+    const auto* text = builder->GetText();
+    uint64_t address = info.code_address;
+    address += info.is_code_address_text_relative ? text->GetAddress() : 0;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     address += CompiledMethod::CodeDelta(info.isa);
     symtab->Add(name_offset, text, address, info.code_size, STB_GLOBAL, STT_FUNC);
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index cc452fc..ead909a 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -16,11 +16,13 @@
 
 #include "dex_to_dex_compiler.h"
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG
+#include "base/macros.h"
 #include "base/mutex.h"
 #include "bytecode_utils.h"
 #include "compiled_method.h"
@@ -294,7 +296,6 @@
   ClassLinker* class_linker = unit_.GetClassLinker();
   ArtMethod* resolved_method =
       class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
-          GetDexFile(),
           method_idx,
           unit_.GetDexCache(),
           unit_.GetClassLoader(),
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 03c90d8..1e0b94d 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -16,7 +16,8 @@
 
 #include "verification_results.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "driver/compiler_driver.h"
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 524b0a6..8934201 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -19,7 +19,8 @@
 #include <algorithm>
 #include <memory>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "code_item_accessors-inl.h"
 #include "dex_file.h"
 #include "dex_instruction-inl.h"
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index c739333..c8c2b69 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -19,7 +19,8 @@
 
 #include "compiled_method_storage.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "compiled_method.h"
 #include "linker/linker_patch.h"
 #include "thread-current-inl.h"
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index de481ca..0769561 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -37,9 +37,6 @@
                         /* compiled_classes */ nullptr,
                         /* compiled_methods */ nullptr,
                         /* thread_count */ 1u,
-                        /* dump_stats */ false,
-                        /* dump_passes */ false,
-                        /* timer */ nullptr,
                         /* swap_fd */ -1,
                         /* profile_compilation_info */ nullptr);
   CompiledMethodStorage* storage = driver.GetCompiledMethodStorage();
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index b043929..294072d 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -32,14 +32,16 @@
 
 namespace art {
 
-inline mirror::Class* CompilerDriver::ResolveClass(
-    const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-    Handle<mirror::ClassLoader> class_loader, dex::TypeIndex cls_index,
+inline ObjPtr<mirror::Class> CompilerDriver::ResolveClass(
+    const ScopedObjectAccess& soa,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    dex::TypeIndex cls_index,
     const DexCompilationUnit* mUnit) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
-  mirror::Class* cls = mUnit->GetClassLinker()->ResolveType(
-      *mUnit->GetDexFile(), cls_index, dex_cache, class_loader);
+  ObjPtr<mirror::Class> cls =
+      mUnit->GetClassLinker()->ResolveType(cls_index, dex_cache, class_loader);
   DCHECK_EQ(cls == nullptr, soa.Self()->IsExceptionPending());
   if (UNLIKELY(cls == nullptr)) {
     // Clean up any exception left by type resolution.
@@ -48,9 +50,11 @@
   return cls;
 }
 
-inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass(
-    const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) {
+inline ObjPtr<mirror::Class> CompilerDriver::ResolveCompilingMethodsClass(
+    const ScopedObjectAccess& soa,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    const DexCompilationUnit* mUnit) {
   DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile());
   DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
   const DexFile::MethodId& referrer_method_id =
@@ -58,13 +62,13 @@
   return ResolveClass(soa, dex_cache, class_loader, referrer_method_id.class_idx_, mUnit);
 }
 
-inline ArtField* CompilerDriver::ResolveFieldWithDexFile(
-    const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-    Handle<mirror::ClassLoader> class_loader, const DexFile* dex_file,
-    uint32_t field_idx, bool is_static) {
-  DCHECK_EQ(dex_cache->GetDexFile(), dex_file);
+inline ArtField* CompilerDriver::ResolveField(const ScopedObjectAccess& soa,
+                                              Handle<mirror::DexCache> dex_cache,
+                                              Handle<mirror::ClassLoader> class_loader,
+                                              uint32_t field_idx,
+                                              bool is_static) {
   ArtField* resolved_field = Runtime::Current()->GetClassLinker()->ResolveField(
-      *dex_file, field_idx, dex_cache, class_loader, is_static);
+      field_idx, dex_cache, class_loader, is_static);
   DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending());
   if (UNLIKELY(resolved_field == nullptr)) {
     // Clean up any exception left by type resolution.
@@ -79,18 +83,11 @@
   return resolved_field;
 }
 
-inline ArtField* CompilerDriver::ResolveField(
-    const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-    Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
-    uint32_t field_idx, bool is_static) {
-  DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
-  return ResolveFieldWithDexFile(soa, dex_cache, class_loader, mUnit->GetDexFile(), field_idx,
-                                 is_static);
-}
-
 inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField(
-    mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    ArtField* resolved_field, uint16_t field_idx) {
+    ObjPtr<mirror::DexCache> dex_cache,
+    ObjPtr<mirror::Class> referrer_class,
+    ArtField* resolved_field,
+    uint16_t field_idx) {
   DCHECK(!resolved_field->IsStatic());
   ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass();
   bool fast_get = referrer_class != nullptr &&
@@ -112,7 +109,7 @@
   DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get());
   ArtMethod* resolved_method =
       mUnit->GetClassLinker()->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
-          *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
+          method_idx, dex_cache, class_loader, /* referrer */ nullptr, invoke_type);
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(soa.Self()->IsExceptionPending());
     // Clean up any exception left by type resolution.
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index fd7ae9f..0631c0f 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -32,6 +32,7 @@
 #include "base/array_ref.h"
 #include "base/bit_vector.h"
 #include "base/enums.h"
+#include "base/logging.h"  // For VLOG
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
@@ -282,9 +283,6 @@
     std::unordered_set<std::string>* compiled_classes,
     std::unordered_set<std::string>* compiled_methods,
     size_t thread_count,
-    bool dump_stats,
-    bool dump_passes,
-    CumulativeLogger* timer,
     int swap_fd,
     const ProfileCompilationInfo* profile_compilation_info)
     : compiler_options_(compiler_options),
@@ -303,9 +301,6 @@
       had_hard_verifier_failure_(false),
       parallel_thread_count_(thread_count),
       stats_(new AOTCompilationStats),
-      dump_stats_(dump_stats),
-      dump_passes_(dump_passes),
-      timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(true),
       compiled_method_storage_(swap_fd),
@@ -396,7 +391,7 @@
   if (GetCompilerOptions().IsAnyCompilationEnabled()) {
     Compile(class_loader, dex_files, timings);
   }
-  if (dump_stats_) {
+  if (GetCompilerOptions().GetDumpStats()) {
     stats_->Dump();
   }
 
@@ -707,7 +702,6 @@
 //       stable order.
 
 static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache,
-                                const DexFile& dex_file,
                                 const DexFile::CodeItem* code_item)
       REQUIRES_SHARED(Locks::mutator_lock_) {
   if (code_item == nullptr) {
@@ -723,7 +717,7 @@
         dex::StringIndex string_index((inst->Opcode() == Instruction::CONST_STRING)
             ? inst->VRegB_21c()
             : inst->VRegB_31c());
-        mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
+        ObjPtr<mirror::String> string = class_linker->ResolveString(string_index, dex_cache);
         CHECK(string != nullptr) << "Could not allocate a string when forcing determinism";
         break;
       }
@@ -778,7 +772,7 @@
           continue;
         }
         previous_method_idx = method_idx;
-        ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem());
+        ResolveConstStrings(dex_cache, it.GetMethodCodeItem());
         it.Next();
       }
       DCHECK(!it.HasNext());
@@ -1054,22 +1048,21 @@
     for (const auto& exception_type : unresolved_exception_types) {
       dex::TypeIndex exception_type_idx = exception_type.first;
       const DexFile* dex_file = exception_type.second;
-      StackHandleScope<2> hs2(self);
+      StackHandleScope<1> hs2(self);
       Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file,
                                                                                      nullptr)));
-      Handle<mirror::Class> klass(hs2.NewHandle(
+      ObjPtr<mirror::Class> klass =
           (dex_cache != nullptr)
-              ? class_linker->ResolveType(*dex_file,
-                                          exception_type_idx,
+              ? class_linker->ResolveType(exception_type_idx,
                                           dex_cache,
                                           ScopedNullHandle<mirror::ClassLoader>())
-              : nullptr));
+              : nullptr;
       if (klass == nullptr) {
         const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
         LOG(FATAL) << "Failed to resolve class " << descriptor;
       }
-      DCHECK(java_lang_Throwable->IsAssignableFrom(klass.Get()));
+      DCHECK(java_lang_Throwable->IsAssignableFrom(klass));
     }
     // Resolving exceptions may load classes that reference more exceptions, iterate until no
     // more are found
@@ -1373,17 +1366,18 @@
 }
 
 ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx,
-                                                   const DexCompilationUnit* mUnit, bool is_put,
+                                                   const DexCompilationUnit* mUnit,
+                                                   bool is_put,
                                                    const ScopedObjectAccess& soa) {
   // Try to resolve the field and compiling method's class.
   ArtField* resolved_field;
-  mirror::Class* referrer_class;
+  ObjPtr<mirror::Class> referrer_class;
   Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache());
   {
-    Handle<mirror::ClassLoader> class_loader_handle = mUnit->GetClassLoader();
-    resolved_field = ResolveField(soa, dex_cache, class_loader_handle, mUnit, field_idx, false);
+    Handle<mirror::ClassLoader> class_loader = mUnit->GetClassLoader();
+    resolved_field = ResolveField(soa, dex_cache, class_loader, field_idx, /* is_static */ false);
     referrer_class = resolved_field != nullptr
-        ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader_handle, mUnit) : nullptr;
+        ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit) : nullptr;
   }
   bool can_link = false;
   if (resolved_field != nullptr && referrer_class != nullptr) {
@@ -1548,7 +1542,7 @@
 
 // A fast version of SkipClass above if the class pointer is available
 // that avoids the expensive FindInClassPath search.
-static bool SkipClass(jobject class_loader, const DexFile& dex_file, mirror::Class* klass)
+static bool SkipClass(jobject class_loader, const DexFile& dex_file, ObjPtr<mirror::Class> klass)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(klass != nullptr);
   const DexFile& original_dex_file = *klass->GetDexCache()->GetDexFile();
@@ -1616,7 +1610,7 @@
       : manager_(manager) {}
 
   void Visit(size_t class_def_index) OVERRIDE REQUIRES(!Locks::mutator_lock_) {
-    ATRACE_CALL();
+    ScopedTrace trace(__FUNCTION__);
     Thread* const self = Thread::Current();
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
@@ -1642,8 +1636,8 @@
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
         soa.Self(), dex_file)));
     // Resolve the class.
-    mirror::Class* klass = class_linker->ResolveType(dex_file, class_def.class_idx_, dex_cache,
-                                                     class_loader);
+    ObjPtr<mirror::Class> klass =
+        class_linker->ResolveType(class_def.class_idx_, dex_cache, class_loader);
     bool resolve_fields_and_methods;
     if (klass == nullptr) {
       // Class couldn't be resolved, for example, super-class is in a different dex file. Don't
@@ -1669,8 +1663,8 @@
       ClassDataItemIterator it(dex_file, class_data);
       while (it.HasNextStaticField()) {
         if (resolve_fields_and_methods) {
-          ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
-                                                               dex_cache, class_loader, true);
+          ArtField* field = class_linker->ResolveField(
+              it.GetMemberIndex(), dex_cache, class_loader, /* is_static */ true);
           if (field == nullptr) {
             CheckAndClearResolveException(soa.Self());
           }
@@ -1684,8 +1678,8 @@
           requires_constructor_barrier = true;
         }
         if (resolve_fields_and_methods) {
-          ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
-                                                               dex_cache, class_loader, false);
+          ArtField* field = class_linker->ResolveField(
+              it.GetMemberIndex(), dex_cache, class_loader, /* is_static */ false);
           if (field == nullptr) {
             CheckAndClearResolveException(soa.Self());
           }
@@ -1695,7 +1689,10 @@
       if (resolve_fields_and_methods) {
         while (it.HasNextMethod()) {
           ArtMethod* method = class_linker->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
-              dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
+              it.GetMemberIndex(),
+              dex_cache,
+              class_loader,
+              /* referrer */ nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
             CheckAndClearResolveException(soa.Self());
@@ -1731,7 +1728,7 @@
         dex_file,
         class_loader.Get())));
     ObjPtr<mirror::Class> klass = (dex_cache != nullptr)
-        ? class_linker->ResolveType(dex_file, dex::TypeIndex(type_idx), dex_cache, class_loader)
+        ? class_linker->ResolveType(dex::TypeIndex(type_idx), dex_cache, class_loader)
         : nullptr;
 
     if (klass == nullptr) {
@@ -1961,7 +1958,7 @@
      : manager_(manager), log_level_(log_level) {}
 
   virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
-    ATRACE_CALL();
+    ScopedTrace trace(__FUNCTION__);
     ScopedObjectAccess soa(Thread::Current());
     const DexFile& dex_file = *manager_->GetDexFile();
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
@@ -2090,7 +2087,7 @@
   explicit SetVerifiedClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
   virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
-    ATRACE_CALL();
+    ScopedTrace trace(__FUNCTION__);
     ScopedObjectAccess soa(Thread::Current());
     const DexFile& dex_file = *manager_->GetDexFile();
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
@@ -2154,7 +2151,7 @@
   explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
   void Visit(size_t class_def_index) OVERRIDE {
-    ATRACE_CALL();
+    ScopedTrace trace(__FUNCTION__);
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
@@ -2334,22 +2331,20 @@
     DCHECK(!klass->IsInitialized());
 
     StackHandleScope<1> hs(Thread::Current());
-    Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache());
-    const DexFile* dex_file = manager_->GetDexFile();
+    Handle<mirror::DexCache> dex_cache = hs.NewHandle(klass->GetDexCache());
     const DexFile::ClassDef* class_def = klass->GetClassDef();
     ClassLinker* class_linker = manager_->GetClassLinker();
 
     // Check encoded final field values for strings and intern.
-    annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file,
-                                                                 &h_dex_cache,
-                                                                 &class_loader,
+    annotations::RuntimeEncodedStaticFieldValueIterator value_it(dex_cache,
+                                                                 class_loader,
                                                                  manager_->GetClassLinker(),
                                                                  *class_def);
     for ( ; value_it.HasNext(); value_it.Next()) {
       if (value_it.GetValueType() == annotations::RuntimeEncodedStaticFieldValueIterator::kString) {
         // Resolve the string. This will intern the string.
         art::ObjPtr<mirror::String> resolved = class_linker->ResolveString(
-            *dex_file, dex::StringIndex(value_it.GetJavaValue().i), h_dex_cache);
+            dex::StringIndex(value_it.GetJavaValue().i), dex_cache);
         CHECK(resolved != nullptr);
       }
     }
@@ -2362,11 +2357,11 @@
       for (const DexInstructionPcPair& inst : code_item->Instructions()) {
         if (inst->Opcode() == Instruction::CONST_STRING) {
           ObjPtr<mirror::String> s = class_linker->ResolveString(
-              *dex_file, dex::StringIndex(inst->VRegB_21c()), h_dex_cache);
+              dex::StringIndex(inst->VRegB_21c()), dex_cache);
           CHECK(s != nullptr);
         } else if (inst->Opcode() == Instruction::CONST_STRING_JUMBO) {
           ObjPtr<mirror::String> s = class_linker->ResolveString(
-              *dex_file, dex::StringIndex(inst->VRegB_31c()), h_dex_cache);
+              dex::StringIndex(inst->VRegB_31c()), dex_cache);
           CHECK(s != nullptr);
         }
       }
@@ -2670,7 +2665,7 @@
   explicit CompileClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
   virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
-    ATRACE_CALL();
+    ScopedTrace trace(__FUNCTION__);
     const DexFile& dex_file = *manager_->GetDexFile();
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
     ClassLinker* class_linker = manager_->GetClassLinker();
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index da4a580..e001726 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -97,9 +97,6 @@
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count,
-                 bool dump_stats,
-                 bool dump_passes,
-                 CumulativeLogger* timer,
                  int swap_fd,
                  const ProfileCompilationInfo* profile_compilation_info);
 
@@ -222,36 +219,33 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve compiling method's class. Returns null on failure.
-  mirror::Class* ResolveCompilingMethodsClass(
-      const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit)
+  ObjPtr<mirror::Class> ResolveCompilingMethodsClass(const ScopedObjectAccess& soa,
+                                                     Handle<mirror::DexCache> dex_cache,
+                                                     Handle<mirror::ClassLoader> class_loader,
+                                                     const DexCompilationUnit* mUnit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::Class* ResolveClass(
-      const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-      Handle<mirror::ClassLoader> class_loader, dex::TypeIndex type_index,
-      const DexCompilationUnit* mUnit)
+  ObjPtr<mirror::Class> ResolveClass(const ScopedObjectAccess& soa,
+                                     Handle<mirror::DexCache> dex_cache,
+                                     Handle<mirror::ClassLoader> class_loader,
+                                     dex::TypeIndex type_index,
+                                     const DexCompilationUnit* mUnit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a field. Returns null on failure, including incompatible class change.
   // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static.
-  ArtField* ResolveField(
-      const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-      Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
-      uint32_t field_idx, bool is_static)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Resolve a field with a given dex file.
-  ArtField* ResolveFieldWithDexFile(
-      const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
-      Handle<mirror::ClassLoader> class_loader, const DexFile* dex_file,
-      uint32_t field_idx, bool is_static)
+  ArtField* ResolveField(const ScopedObjectAccess& soa,
+                         Handle<mirror::DexCache> dex_cache,
+                         Handle<mirror::ClassLoader> class_loader,
+                         uint32_t field_idx,
+                         bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset.
-  std::pair<bool, bool> IsFastInstanceField(
-      mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-      ArtField* resolved_field, uint16_t field_idx)
+  std::pair<bool, bool> IsFastInstanceField(ObjPtr<mirror::DexCache> dex_cache,
+                                            ObjPtr<mirror::Class> referrer_class,
+                                            ArtField* resolved_field,
+                                            uint16_t field_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a method. Returns null on failure, including incompatible class change.
@@ -273,9 +267,9 @@
       REQUIRES(!Locks::mutator_lock_);
 
   ArtField* ComputeInstanceFieldInfo(uint32_t field_idx,
-                                             const DexCompilationUnit* mUnit,
-                                             bool is_put,
-                                             const ScopedObjectAccess& soa)
+                                     const DexCompilationUnit* mUnit,
+                                     bool is_put,
+                                     const ScopedObjectAccess& soa)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
 
@@ -302,18 +296,6 @@
     return parallel_thread_count_;
   }
 
-  bool GetDumpStats() const {
-    return dump_stats_;
-  }
-
-  bool GetDumpPasses() const {
-    return dump_passes_;
-  }
-
-  CumulativeLogger* GetTimingsLogger() const {
-    return timings_logger_;
-  }
-
   void SetDedupeEnabled(bool dedupe_enabled) {
     compiled_method_storage_.SetDedupeEnabled(dedupe_enabled);
   }
@@ -536,11 +518,6 @@
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
 
-  bool dump_stats_;
-  const bool dump_passes_;
-
-  CumulativeLogger* const timings_logger_;
-
   typedef void (*CompilerCallbackFn)(CompilerDriver& driver);
   typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver);
 
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 032763c..c0a9a05 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -49,6 +49,8 @@
       implicit_so_checks_(true),
       implicit_suspend_checks_(false),
       compile_pic_(false),
+      dump_timings_(false),
+      dump_stats_(false),
       verbose_methods_(),
       abort_on_hard_verifier_failure_(false),
       abort_on_soft_verifier_failure_(false),
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index a71f61a..3f66029 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -266,6 +266,14 @@
     return passes_to_run_;
   }
 
+  bool GetDumpTimings() const {
+    return dump_timings_;
+  }
+
+  bool GetDumpStats() const {
+    return dump_stats_;
+  }
+
  private:
   bool ParseDumpInitFailures(const std::string& option, std::string* error_msg);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
@@ -303,6 +311,8 @@
   bool implicit_so_checks_;
   bool implicit_suspend_checks_;
   bool compile_pic_;
+  bool dump_timings_;
+  bool dump_stats_;
 
   // Vector of methods to have verbose output enabled for.
   std::vector<std::string> verbose_methods_;
diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h
index e28d499..f97ab08 100644
--- a/compiler/driver/compiler_options_map-inl.h
+++ b/compiler/driver/compiler_options_map-inl.h
@@ -78,6 +78,14 @@
   map.AssignIfExists(Base::VerboseMethods, &options->verbose_methods_);
   options->deduplicate_code_ = map.GetOrDefault(Base::DeduplicateCode);
 
+  if (map.Exists(Base::DumpTimings)) {
+    options->dump_timings_ = true;
+  }
+
+  if (map.Exists(Base::DumpStats)) {
+    options->dump_stats_ = true;
+  }
+
   return true;
 }
 
@@ -129,6 +137,12 @@
           .WithValueMap({{"false", false}, {"true", true}})
           .IntoKey(Map::DeduplicateCode)
 
+      .Define({"--dump-timings"})
+          .IntoKey(Map::DumpTimings)
+
+      .Define({"--dump-stats"})
+          .IntoKey(Map::DumpStats)
+
       .Define("--debuggable")
           .IntoKey(Map::Debuggable)
 
diff --git a/compiler/driver/compiler_options_map.def b/compiler/driver/compiler_options_map.def
index cccd618..2c56fd7 100644
--- a/compiler/driver/compiler_options_map.def
+++ b/compiler/driver/compiler_options_map.def
@@ -58,5 +58,7 @@
 COMPILER_OPTIONS_KEY (std::string,                 RegisterAllocationStrategy)
 COMPILER_OPTIONS_KEY (ParseStringList<','>,        VerboseMethods)
 COMPILER_OPTIONS_KEY (bool,                        DeduplicateCode,        true)
+COMPILER_OPTIONS_KEY (Unit,                        DumpTimings)
+COMPILER_OPTIONS_KEY (Unit,                        DumpStats)
 
 #undef COMPILER_OPTIONS_KEY
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 0c82d60..88e3e5b 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -21,7 +21,9 @@
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
+#include "base/logging.h"  // For VLOG
 #include "base/stringpiece.h"
+#include "base/systrace.h"
 #include "base/time_utils.h"
 #include "base/timing_logger.h"
 #include "base/unix_file/fd_file.h"
@@ -130,7 +132,6 @@
   if (instruction_set_features_ == nullptr) {
     instruction_set_features_ = InstructionSetFeatures::FromCppDefines();
   }
-  cumulative_logger_.reset(new CumulativeLogger("jit times"));
   compiler_driver_.reset(new CompilerDriver(
       compiler_options_.get(),
       /* verification_results */ nullptr,
@@ -141,9 +142,6 @@
       /* compiled_classes */ nullptr,
       /* compiled_methods */ nullptr,
       /* thread_count */ 1,
-      /* dump_stats */ false,
-      /* dump_passes */ false,
-      cumulative_logger_.get(),
       /* swap_fd */ -1,
       /* profile_compilation_info */ nullptr));
   // Disable dedupe so we can remove compiled methods.
@@ -166,6 +164,8 @@
 }
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
+  SCOPED_TRACE << "JIT compiling " << method->PrettyMethod();
+
   DCHECK(!method->IsProxyMethod());
   DCHECK(method->GetDeclaringClass()->IsResolved());
 
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 1e1838e..31dc9e2 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -48,7 +48,6 @@
 
  private:
   std::unique_ptr<CompilerOptions> compiler_options_;
-  std::unique_ptr<CumulativeLogger> cumulative_logger_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
   std::unique_ptr<JitLogger> jit_logger_;
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 3e637bc..54f193b 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -16,7 +16,9 @@
 
 #include "calling_convention_arm.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "handle_scope-inl.h"
 #include "utils/arm/managed_register_arm.h"
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 3afd701..328ecbb 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -16,7 +16,8 @@
 
 #include "calling_convention_arm64.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "handle_scope-inl.h"
 #include "utils/arm64/managed_register_arm64.h"
 
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 55c27d1..ff814c8 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -16,7 +16,7 @@
 
 #include "calling_convention.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "jni/quick/arm/calling_convention_arm.h"
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index b93b05c..136e3db 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -25,11 +25,10 @@
 #include "art_method.h"
 #include "base/arena_allocator.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "calling_convention.h"
 #include "class_linker.h"
-#include "compiled_method.h"
 #include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
@@ -115,10 +114,10 @@
 //   convention.
 //
 template <PointerSize kPointerSize>
-static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
-                                                   uint32_t access_flags,
-                                                   uint32_t method_idx,
-                                                   const DexFile& dex_file) {
+static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver,
+                                                     uint32_t access_flags,
+                                                     uint32_t method_idx,
+                                                     const DexFile& dex_file) {
   const bool is_native = (access_flags & kAccNative) != 0;
   CHECK(is_native);
   const bool is_static = (access_flags & kAccStatic) != 0;
@@ -657,16 +656,12 @@
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
 
-  return CompiledMethod::SwapAllocCompiledMethod(driver,
-                                                 instruction_set,
-                                                 ArrayRef<const uint8_t>(managed_code),
-                                                 frame_size,
-                                                 main_jni_conv->CoreSpillMask(),
-                                                 main_jni_conv->FpSpillMask(),
-                                                 /* method_info */ ArrayRef<const uint8_t>(),
-                                                 /* vmap_table */ ArrayRef<const uint8_t>(),
-                                                 ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
-                                                 ArrayRef<const linker::LinkerPatch>());
+  return JniCompiledMethod(instruction_set,
+                           std::move(managed_code),
+                           frame_size,
+                           main_jni_conv->CoreSpillMask(),
+                           main_jni_conv->FpSpillMask(),
+                           ArrayRef<const uint8_t>(*jni_asm->cfi().data()));
 }
 
 // Copy a single parameter from the managed to the JNI calling convention.
@@ -775,10 +770,10 @@
   }
 }
 
-CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler,
-                                         uint32_t access_flags,
-                                         uint32_t method_idx,
-                                         const DexFile& dex_file) {
+JniCompiledMethod ArtQuickJniCompileMethod(CompilerDriver* compiler,
+                                           uint32_t access_flags,
+                                           uint32_t method_idx,
+                                           const DexFile& dex_file) {
   if (Is64BitInstructionSet(compiler->GetInstructionSet())) {
     return ArtJniCompileMethodInternal<PointerSize::k64>(
         compiler, access_flags, method_idx, dex_file);
diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h
index 3fcce55..1141994 100644
--- a/compiler/jni/quick/jni_compiler.h
+++ b/compiler/jni/quick/jni_compiler.h
@@ -17,18 +17,55 @@
 #ifndef ART_COMPILER_JNI_QUICK_JNI_COMPILER_H_
 #define ART_COMPILER_JNI_QUICK_JNI_COMPILER_H_
 
-#include "compiler.h"
-#include "dex_file.h"
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "base/array_ref.h"
 
 namespace art {
 
+class ArtMethod;
 class CompilerDriver;
-class CompiledMethod;
+class DexFile;
 
-CompiledMethod* ArtQuickJniCompileMethod(CompilerDriver* compiler,
-                                         uint32_t access_flags,
-                                         uint32_t method_idx,
-                                         const DexFile& dex_file);
+class JniCompiledMethod {
+ public:
+  JniCompiledMethod(InstructionSet instruction_set,
+                    std::vector<uint8_t>&& code,
+                    uint32_t frame_size,
+                    uint32_t core_spill_mask,
+                    uint32_t fp_spill_mask,
+                    ArrayRef<const uint8_t> cfi)
+      : instruction_set_(instruction_set),
+        code_(std::move(code)),
+        frame_size_(frame_size),
+        core_spill_mask_(core_spill_mask),
+        fp_spill_mask_(fp_spill_mask),
+        cfi_(cfi.begin(), cfi.end()) {}
+
+  JniCompiledMethod(JniCompiledMethod&& other) = default;
+  ~JniCompiledMethod() = default;
+
+  InstructionSet GetInstructionSet() const { return instruction_set_; }
+  ArrayRef<const uint8_t> GetCode() const { return ArrayRef<const uint8_t>(code_); }
+  uint32_t GetFrameSize() const { return frame_size_; }
+  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
+  uint32_t GetFpSpillMask() const { return fp_spill_mask_; }
+  ArrayRef<const uint8_t> GetCfi() const { return ArrayRef<const uint8_t>(cfi_); }
+
+ private:
+  InstructionSet instruction_set_;
+  std::vector<uint8_t> code_;
+  uint32_t frame_size_;
+  uint32_t core_spill_mask_;
+  uint32_t fp_spill_mask_;
+  std::vector<uint8_t> cfi_;
+};
+
+JniCompiledMethod ArtQuickJniCompileMethod(CompilerDriver* compiler,
+                                           uint32_t access_flags,
+                                           uint32_t method_idx,
+                                           const DexFile& dex_file);
 
 }  // namespace art
 
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index 0e0716e..5ec1add 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -16,7 +16,8 @@
 
 #include "calling_convention_mips.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "handle_scope-inl.h"
 #include "utils/mips/managed_register_mips.h"
 
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index afe6a76..a7012ae 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -16,7 +16,8 @@
 
 #include "calling_convention_mips64.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "handle_scope-inl.h"
 #include "utils/mips64/managed_register_mips64.h"
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 0bfcc3f..ad58e38 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -16,7 +16,8 @@
 
 #include "calling_convention_x86.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "handle_scope-inl.h"
 #include "utils/x86/managed_register_x86.h"
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index ba654f4..e5e96d0 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -16,8 +16,9 @@
 
 #include "calling_convention_x86_64.h"
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "handle_scope-inl.h"
 #include "utils/x86_64/managed_register_x86_64.h"
 
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 48747fc..7875517 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -16,6 +16,8 @@
 
 #include "linker/arm/relative_patcher_thumb2.h"
 
+#include <sstream>
+
 #include "arch/arm/asm_support_arm.h"
 #include "art_method.h"
 #include "base/bit_utils.h"
diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h
index b30b55e..aa3cd98 100644
--- a/compiler/linker/elf_builder.h
+++ b/compiler/linker/elf_builder.h
@@ -108,8 +108,6 @@
           section_index_(0),
           name_(name),
           link_(link),
-          started_(false),
-          finished_(false),
           phdr_flags_(PF_R),
           phdr_type_(0) {
       DCHECK_GE(align, 1u);
@@ -120,90 +118,62 @@
       header_.sh_entsize = entsize;
     }
 
-    // Start writing of this section.
-    void Start() {
-      CHECK(!started_);
-      CHECK(!finished_);
-      started_ = true;
-      auto& sections = owner_->sections_;
-      // Check that the previous section is complete.
-      CHECK(sections.empty() || sections.back()->finished_);
-      // The first ELF section index is 1. Index 0 is reserved for NULL.
-      section_index_ = sections.size() + 1;
-      // Page-align if we switch between allocated and non-allocated sections,
-      // or if we change the type of allocation (e.g. executable vs non-executable).
-      if (!sections.empty()) {
-        if (header_.sh_flags != sections.back()->header_.sh_flags) {
-          header_.sh_addralign = kPageSize;
-        }
-      }
-      // Align file position.
-      if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = owner_->AlignFileOffset(header_.sh_addralign);
-      } else {
-        header_.sh_offset = 0;
-      }
-      // Align virtual memory address.
-      if ((header_.sh_flags & SHF_ALLOC) != 0) {
-        header_.sh_addr = owner_->AlignVirtualAddress(header_.sh_addralign);
-      } else {
-        header_.sh_addr = 0;
-      }
-      // Push this section on the list of written sections.
-      sections.push_back(this);
+    // Allocate chunk of virtual memory for this section from the owning ElfBuilder.
+    // This must be done at the start for all SHF_ALLOC sections (i.e. mmaped by linker).
+    // It is fine to allocate section but never call Start/End() (e.g. the .bss section).
+    void AllocateVirtualMemory(Elf_Word size) {
+      AllocateVirtualMemory(owner_->virtual_address_, size);
     }
 
-    // Finish writing of this section.
+    void AllocateVirtualMemory(Elf_Addr addr, Elf_Word size) {
+      CHECK_NE(header_.sh_flags & SHF_ALLOC, 0u);
+      Elf_Word align = AddSection();
+      CHECK_EQ(header_.sh_addr, 0u);
+      header_.sh_addr = RoundUp(addr, align);
+      CHECK(header_.sh_size == 0u || header_.sh_size == size);
+      header_.sh_size = size;
+      CHECK_LE(owner_->virtual_address_, header_.sh_addr);
+      owner_->virtual_address_ = header_.sh_addr + header_.sh_size;
+    }
+
+    // Start writing file data of this section.
+    void Start() {
+      CHECK(owner_->current_section_ == nullptr);
+      Elf_Word align = AddSection();
+      CHECK_EQ(header_.sh_offset, 0u);
+      header_.sh_offset = owner_->AlignFileOffset(align);
+      owner_->current_section_ = this;
+    }
+
+    // Finish writing file data of this section.
     void End() {
-      CHECK(started_);
-      CHECK(!finished_);
-      finished_ = true;
-      if (header_.sh_type == SHT_NOBITS) {
-        CHECK_GT(header_.sh_size, 0u);
-      } else {
-        // Use the current file position to determine section size.
-        off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
-        CHECK_GE(file_offset, (off_t)header_.sh_offset);
-        header_.sh_size = file_offset - header_.sh_offset;
-      }
-      if ((header_.sh_flags & SHF_ALLOC) != 0) {
-        owner_->virtual_address_ += header_.sh_size;
-      }
+      CHECK(owner_->current_section_ == this);
+      Elf_Word position = GetPosition();
+      CHECK(header_.sh_size == 0u || header_.sh_size == position);
+      header_.sh_size = position;
+      owner_->current_section_ = nullptr;
+    }
+
+    // Get the number of bytes written so far.
+    // Only valid while writing the section.
+    Elf_Word GetPosition() const {
+      CHECK(owner_->current_section_ == this);
+      off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
+      DCHECK_GE(file_offset, (off_t)header_.sh_offset);
+      return file_offset - header_.sh_offset;
     }
 
     // Get the location of this section in virtual memory.
     Elf_Addr GetAddress() const {
-      CHECK(started_);
-      return header_.sh_addr;
-    }
-
-    // Returns the size of the content of this section.
-    Elf_Word GetSize() const {
-      if (finished_) {
-        return header_.sh_size;
-      } else {
-        CHECK(started_);
-        CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
-        return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset;
-      }
-    }
-
-    // Write this section as "NOBITS" section. (used for the .bss section)
-    // This means that the ELF file does not contain the initial data for this section
-    // and it will be zero-initialized when the ELF file is loaded in the running program.
-    void WriteNoBitsSection(Elf_Word size) {
       DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u);
-      header_.sh_type = SHT_NOBITS;
-      Start();
-      header_.sh_size = size;
-      End();
+      DCHECK_NE(header_.sh_addr, 0u);
+      return header_.sh_addr;
     }
 
     // This function always succeeds to simplify code.
     // Use builder's Good() to check the actual status.
     bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
-      CHECK(started_);
-      CHECK(!finished_);
+      CHECK(owner_->current_section_ == this);
       return owner_->stream_.WriteFully(buffer, byte_count);
     }
 
@@ -221,19 +191,32 @@
     }
 
     Elf_Word GetSectionIndex() const {
-      DCHECK(started_);
       DCHECK_NE(section_index_, 0u);
       return section_index_;
     }
 
    private:
+    // Add this section to the list of generated ELF sections (if not there already).
+    // It also ensures the alignment is sufficient to generate valid program headers,
+    // since that depends on the previous section. It returns the required alignment.
+    Elf_Word AddSection() {
+      if (section_index_ == 0) {
+        std::vector<Section*>& sections = owner_->sections_;
+        Elf_Word last = sections.empty() ? PF_R : sections.back()->phdr_flags_;
+        if (phdr_flags_ != last) {
+          header_.sh_addralign = kPageSize;  // Page-align if R/W/X flags changed.
+        }
+        sections.push_back(this);
+        section_index_ = sections.size();  // First ELF section has index 1.
+      }
+      return owner_->write_program_headers_ ? header_.sh_addralign : 1;
+    }
+
     ElfBuilder<ElfTypes>* owner_;
     Elf_Shdr header_;
     Elf_Word section_index_;
     const std::string name_;
     const Section* const link_;
-    bool started_;
-    bool finished_;
     Elf_Word phdr_flags_;
     Elf_Word phdr_type_;
 
@@ -370,7 +353,7 @@
       Elf_Word section_index;
       if (section != nullptr) {
         DCHECK_LE(section->GetAddress(), addr);
-        DCHECK_LE(addr, section->GetAddress() + section->GetSize());
+        DCHECK_LE(addr, section->GetAddress() + section->header_.sh_size);
         section_index = section->GetSectionIndex();
       } else {
         section_index = static_cast<Elf_Word>(SHN_ABS);
@@ -479,6 +462,10 @@
           digest_start_(-1) {
     }
 
+    Elf_Word GetSize() {
+      return 16 + kBuildIdLen;
+    }
+
     void Write() {
       // The size fields are 32-bit on both 32-bit and 64-bit systems, confirmed
       // with the 64-bit linker and libbfd code. The size of name and desc must
@@ -490,6 +477,7 @@
       digest_start_ = this->Seek(0, kSeekCurrent);
       static_assert(kBuildIdLen % 4 == 0, "expecting a mutliple of 4 for build ID length");
       this->WriteFully(std::string(kBuildIdLen, '\0').c_str(), kBuildIdLen);  // desc.
+      DCHECK_EQ(this->GetPosition(), GetSize());
     }
 
     off_t GetDigestStart() {
@@ -530,6 +518,7 @@
         abiflags_(this, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
                   isa, features),
         build_id_(this, ".note.gnu.build-id", SHT_NOTE, SHF_ALLOC, nullptr, 0, 4, 0),
+        current_section_(nullptr),
         started_(false),
         write_program_headers_(false),
         loaded_size_(0u),
@@ -545,6 +534,7 @@
   ~ElfBuilder() {}
 
   InstructionSet GetIsa() { return isa_; }
+  BuildIdSection* GetBuildId() { return &build_id_; }
   Section* GetRoData() { return &rodata_; }
   Section* GetText() { return &text_; }
   Section* GetBss() { return &bss_; }
@@ -622,6 +612,9 @@
       if (section->link_ != nullptr) {
         section->header_.sh_link = section->link_->GetSectionIndex();
       }
+      if (section->header_.sh_offset == 0) {
+        section->header_.sh_type = SHT_NOBITS;
+      }
     }
     shstrtab_.End();
 
@@ -680,65 +673,57 @@
       soname = soname.substr(directory_separator_pos + 1);
     }
 
-    // Calculate addresses of .text, .bss and .dynstr.
-    DCHECK_EQ(rodata_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
-    DCHECK_EQ(text_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
-    DCHECK_EQ(bss_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
-    DCHECK_EQ(dynstr_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
-    Elf_Word rodata_address = rodata_.GetAddress();
-    Elf_Word text_address = RoundUp(rodata_address + rodata_size, kPageSize);
-    Elf_Word bss_address = RoundUp(text_address + text_size, kPageSize);
-    Elf_Word abiflags_address = RoundUp(bss_address + bss_size, kPageSize);
-    Elf_Word abiflags_size = 0;
-    if (isa_ == InstructionSet::kMips || isa_ == InstructionSet::kMips64) {
-      abiflags_size = abiflags_.GetSize();
+    // Allocate all pre-dynamic sections.
+    rodata_.AllocateVirtualMemory(rodata_size);
+    text_.AllocateVirtualMemory(text_size);
+    if (bss_size != 0) {
+      bss_.AllocateVirtualMemory(bss_size);
     }
-    Elf_Word dynstr_address = RoundUp(abiflags_address + abiflags_size, kPageSize);
+    if (isa_ == InstructionSet::kMips || isa_ == InstructionSet::kMips64) {
+      abiflags_.AllocateVirtualMemory(abiflags_.GetSize());
+    }
 
     // Cache .dynstr, .dynsym and .hash data.
     dynstr_.Add("");  // dynstr should start with empty string.
-    Elf_Word rodata_index = rodata_.GetSectionIndex();
     Elf_Word oatdata = dynstr_.Add("oatdata");
-    dynsym_.Add(oatdata, rodata_index, rodata_address, rodata_size, STB_GLOBAL, STT_OBJECT);
+    dynsym_.Add(oatdata, &rodata_, rodata_.GetAddress(), rodata_size, STB_GLOBAL, STT_OBJECT);
     if (text_size != 0u) {
-      Elf_Word text_index = rodata_index + 1u;
       Elf_Word oatexec = dynstr_.Add("oatexec");
-      dynsym_.Add(oatexec, text_index, text_address, text_size, STB_GLOBAL, STT_OBJECT);
+      dynsym_.Add(oatexec, &text_, text_.GetAddress(), text_size, STB_GLOBAL, STT_OBJECT);
       Elf_Word oatlastword = dynstr_.Add("oatlastword");
-      Elf_Word oatlastword_address = text_address + text_size - 4;
-      dynsym_.Add(oatlastword, text_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
+      Elf_Word oatlastword_address = text_.GetAddress() + text_size - 4;
+      dynsym_.Add(oatlastword, &text_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
     } else if (rodata_size != 0) {
       // rodata_ can be size 0 for dwarf_test.
       Elf_Word oatlastword = dynstr_.Add("oatlastword");
-      Elf_Word oatlastword_address = rodata_address + rodata_size - 4;
-      dynsym_.Add(oatlastword, rodata_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
+      Elf_Word oatlastword_address = rodata_.GetAddress() + rodata_size - 4;
+      dynsym_.Add(oatlastword, &rodata_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT);
     }
     DCHECK_LE(bss_roots_offset, bss_size);
     if (bss_size != 0u) {
-      Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u);
       Elf_Word oatbss = dynstr_.Add("oatbss");
-      dynsym_.Add(oatbss, bss_index, bss_address, bss_roots_offset, STB_GLOBAL, STT_OBJECT);
+      dynsym_.Add(oatbss, &bss_, bss_.GetAddress(), bss_roots_offset, STB_GLOBAL, STT_OBJECT);
       DCHECK_LE(bss_methods_offset, bss_roots_offset);
       DCHECK_LE(bss_roots_offset, bss_size);
       // Add a symbol marking the start of the methods part of the .bss, if not empty.
       if (bss_methods_offset != bss_roots_offset) {
-        Elf_Word bss_methods_address = bss_address + bss_methods_offset;
+        Elf_Word bss_methods_address = bss_.GetAddress() + bss_methods_offset;
         Elf_Word bss_methods_size = bss_roots_offset - bss_methods_offset;
         Elf_Word oatbssroots = dynstr_.Add("oatbssmethods");
         dynsym_.Add(
-            oatbssroots, bss_index, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT);
+            oatbssroots, &bss_, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT);
       }
       // Add a symbol marking the start of the GC roots part of the .bss, if not empty.
       if (bss_roots_offset != bss_size) {
-        Elf_Word bss_roots_address = bss_address + bss_roots_offset;
+        Elf_Word bss_roots_address = bss_.GetAddress() + bss_roots_offset;
         Elf_Word bss_roots_size = bss_size - bss_roots_offset;
         Elf_Word oatbssroots = dynstr_.Add("oatbssroots");
         dynsym_.Add(
-            oatbssroots, bss_index, bss_roots_address, bss_roots_size, STB_GLOBAL, STT_OBJECT);
+            oatbssroots, &bss_, bss_roots_address, bss_roots_size, STB_GLOBAL, STT_OBJECT);
       }
       Elf_Word oatbsslastword = dynstr_.Add("oatbsslastword");
-      Elf_Word bsslastword_address = bss_address + bss_size - 4;
-      dynsym_.Add(oatbsslastword, bss_index, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT);
+      Elf_Word bsslastword_address = bss_.GetAddress() + bss_size - 4;
+      dynsym_.Add(oatbsslastword, &bss_, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT);
     }
     Elf_Word soname_offset = dynstr_.Add(soname);
 
@@ -759,28 +744,24 @@
     hash.push_back(0);  // Last symbol terminates the chain.
     hash_.Add(hash.data(), hash.size() * sizeof(hash[0]));
 
-    // Calculate addresses of .dynsym, .hash and .dynamic.
-    DCHECK_EQ(dynstr_.header_.sh_flags, dynsym_.header_.sh_flags);
-    DCHECK_EQ(dynsym_.header_.sh_flags, hash_.header_.sh_flags);
-    Elf_Word dynsym_address =
-        RoundUp(dynstr_address + dynstr_.GetCacheSize(), dynsym_.header_.sh_addralign);
-    Elf_Word hash_address =
-        RoundUp(dynsym_address + dynsym_.GetCacheSize(), hash_.header_.sh_addralign);
-    DCHECK_EQ(dynamic_.header_.sh_addralign, static_cast<Elf_Word>(kPageSize));
-    Elf_Word dynamic_address = RoundUp(hash_address + dynsym_.GetCacheSize(), kPageSize);
+    // Allocate all remaining sections.
+    dynstr_.AllocateVirtualMemory(dynstr_.GetCacheSize());
+    dynsym_.AllocateVirtualMemory(dynsym_.GetCacheSize());
+    hash_.AllocateVirtualMemory(hash_.GetCacheSize());
 
     Elf_Dyn dyns[] = {
-      { DT_HASH, { hash_address } },
-      { DT_STRTAB, { dynstr_address } },
-      { DT_SYMTAB, { dynsym_address } },
+      { DT_HASH, { hash_.GetAddress() } },
+      { DT_STRTAB, { dynstr_.GetAddress() } },
+      { DT_SYMTAB, { dynsym_.GetAddress() } },
       { DT_SYMENT, { sizeof(Elf_Sym) } },
       { DT_STRSZ, { dynstr_.GetCacheSize() } },
       { DT_SONAME, { soname_offset } },
       { DT_NULL, { 0 } },
     };
     dynamic_.Add(&dyns, sizeof(dyns));
+    dynamic_.AllocateVirtualMemory(dynamic_.GetCacheSize());
 
-    loaded_size_ = RoundUp(dynamic_address + dynamic_.GetCacheSize(), kPageSize);
+    loaded_size_ = RoundUp(virtual_address_, kPageSize);
   }
 
   void WriteDynamicSection() {
@@ -788,8 +769,6 @@
     dynsym_.WriteCachedSection();
     hash_.WriteCachedSection();
     dynamic_.WriteCachedSection();
-
-    CHECK_EQ(loaded_size_, RoundUp(dynamic_.GetAddress() + dynamic_.GetSize(), kPageSize));
   }
 
   Elf_Word GetLoadedSize() {
@@ -828,10 +807,6 @@
      return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet);
   }
 
-  Elf_Addr AlignVirtualAddress(size_t alignment) {
-     return virtual_address_ = RoundUp(virtual_address_, alignment);
-  }
-
  private:
   static Elf_Ehdr MakeElfHeader(InstructionSet isa, const InstructionSetFeatures* features) {
     Elf_Ehdr elf_header = Elf_Ehdr();
@@ -902,7 +877,6 @@
     elf_header.e_ehsize = sizeof(Elf_Ehdr);
     elf_header.e_phentsize = sizeof(Elf_Phdr);
     elf_header.e_shentsize = sizeof(Elf_Shdr);
-    elf_header.e_phoff = sizeof(Elf_Ehdr);
     return elf_header;
   }
 
@@ -933,6 +907,7 @@
     for (auto* section : sections_) {
       const Elf_Shdr& shdr = section->header_;
       if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        DCHECK(shdr.sh_addr != 0u) << "Allocate virtual memory for the section";
         // PT_LOAD tells the linker to mmap part of the file.
         // The linker can only mmap page-aligned sections.
         // Single PT_LOAD may contain several ELF sections.
@@ -1010,6 +985,7 @@
 
   // List of used section in the order in which they were written.
   std::vector<Section*> sections_;
+  Section* current_section_;  // The section which is currently being written.
 
   bool started_;
   bool write_program_headers_;
diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h
index 33e6b5a..659f1dc 100644
--- a/compiler/linker/error_delaying_output_stream.h
+++ b/compiler/linker/error_delaying_output_stream.h
@@ -19,7 +19,9 @@
 
 #include "output_stream.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
index 0ac1490..6f4e774 100644
--- a/compiler/linker/linker_patch.h
+++ b/compiler/linker/linker_patch.h
@@ -20,8 +20,9 @@
 #include <iosfwd>
 #include <stdint.h>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "method_reference.h"
 
 namespace art {
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
index ad29840..f93ea7a 100644
--- a/compiler/linker/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -17,7 +17,9 @@
 #include "file_output_stream.h"
 #include "vector_output_stream.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 6297dd0..9e9d14a 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -52,9 +52,6 @@
                 /* compiled_classes */ nullptr,
                 /* compiled_methods */ nullptr,
                 /* thread_count */ 1u,
-                /* dump_stats */ false,
-                /* dump_passes */ false,
-                /* timer */ nullptr,
                 /* swap_fd */ -1,
                 /* profile_compilation_info */ nullptr),
         error_msg_(),
diff --git a/compiler/linker/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc
index 75f90e5..f2cae5b 100644
--- a/compiler/linker/vector_output_stream.cc
+++ b/compiler/linker/vector_output_stream.cc
@@ -16,7 +16,7 @@
 
 #include "vector_output_stream.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 namespace linker {
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index a6687fe..58f591b 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -16,6 +16,7 @@
 
 #include "block_builder.h"
 
+#include "base/logging.h"  // FOR VLOG.
 #include "bytecode_utils.h"
 #include "quicken_info.h"
 
@@ -58,6 +59,7 @@
         // cannot have any code afterwards.
       } else {
         // The TryItem spans beyond the end of the CodeItem. This is invalid code.
+        VLOG(compiler) << "Not compiled: TryItem spans beyond the end of the CodeItem";
         return false;
       }
     }
@@ -110,6 +112,7 @@
       if (next == instructions.end()) {
         // In the normal case we should never hit this but someone can artificially forge a dex
         // file to fall-through out the method code. In this case we bail out compilation.
+        VLOG(compiler) << "Not compiled: Fall-through beyond the CodeItem";
         return false;
       }
       MaybeCreateBlockAt(next.DexPc());
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0bd3ce9..45eec6d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -981,21 +981,6 @@
     }
   }
 
-  uint32_t outer_dex_pc = dex_pc;
-  uint32_t outer_environment_size = 0;
-  uint32_t inlining_depth = 0;
-  if (instruction != nullptr) {
-    for (HEnvironment* environment = instruction->GetEnvironment();
-         environment != nullptr;
-         environment = environment->GetParent()) {
-      outer_dex_pc = environment->GetDexPc();
-      outer_environment_size = environment->Size();
-      if (environment != instruction->GetEnvironment()) {
-        inlining_depth++;
-      }
-    }
-  }
-
   // Collect PC infos for the mapping table.
   uint32_t native_pc = GetAssembler()->CodePosition();
 
@@ -1003,12 +988,12 @@
   if (instruction == nullptr) {
     // For stack overflow checks and native-debug-info entries without dex register
     // mapping (i.e. start of basic block or start of slow path).
-    stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0);
+    stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0);
     stack_map_stream->EndStackMapEntry();
     return;
   }
-  LocationSummary* locations = instruction->GetLocations();
 
+  LocationSummary* locations = instruction->GetLocations();
   uint32_t register_mask = locations->GetRegisterMask();
   DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u);
   if (locations->OnlyCallsOnSlowPath()) {
@@ -1023,22 +1008,33 @@
     // The register mask must be a subset of callee-save registers.
     DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask);
   }
+
+  uint32_t outer_dex_pc = dex_pc;
+  uint32_t outer_environment_size = 0u;
+  uint32_t inlining_depth = 0;
+  HEnvironment* const environment = instruction->GetEnvironment();
+  if (environment != nullptr) {
+    HEnvironment* outer_environment = environment;
+    while (outer_environment->GetParent() != nullptr) {
+      outer_environment = outer_environment->GetParent();
+      ++inlining_depth;
+    }
+    outer_dex_pc = outer_environment->GetDexPc();
+    outer_environment_size = outer_environment->Size();
+  }
   stack_map_stream->BeginStackMapEntry(outer_dex_pc,
                                        native_pc,
                                        register_mask,
                                        locations->GetStackMask(),
                                        outer_environment_size,
                                        inlining_depth);
-
-  HEnvironment* const environment = instruction->GetEnvironment();
   EmitEnvironment(environment, slow_path);
   // Record invoke info, the common case for the trampoline is super and static invokes. Only
   // record these to reduce oat file size.
   if (kEnableDexLayoutOptimizations) {
-    if (environment != nullptr &&
-        instruction->IsInvoke() &&
-        instruction->IsInvokeStaticOrDirect()) {
-      HInvoke* const invoke = instruction->AsInvoke();
+    if (instruction->IsInvokeStaticOrDirect()) {
+      HInvoke* const invoke = instruction->AsInvokeStaticOrDirect();
+      DCHECK(environment != nullptr);
       stack_map_stream->AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex());
     }
   }
@@ -1411,10 +1407,10 @@
 
 void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) {
   if (compiler_options_.GetImplicitNullChecks()) {
-    MaybeRecordStat(stats_, kImplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, MethodCompilationStat::kImplicitNullCheckGenerated);
     GenerateImplicitNullCheck(instruction);
   } else {
-    MaybeRecordStat(stats_, kExplicitNullCheckGenerated);
+    MaybeRecordStat(stats_, MethodCompilationStat::kExplicitNullCheckGenerated);
     GenerateExplicitNullCheck(instruction);
   }
 }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index d6922d2..1f6b214 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1941,6 +1941,7 @@
   DCHECK_EQ(instruction->InputCount(), 2U);
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   DataType::Type type = instruction->GetResultType();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
   switch (type) {
     case DataType::Type::kInt32: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -1950,11 +1951,22 @@
         int32_t imm = CodeGenerator::GetInt32ValueOf(right->AsConstant());
         if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
           can_use_imm = IsUint<16>(imm);
-        } else if (instruction->IsAdd()) {
-          can_use_imm = IsInt<16>(imm);
         } else {
-          DCHECK(instruction->IsSub());
-          can_use_imm = IsInt<16>(-imm);
+          DCHECK(instruction->IsSub() || instruction->IsAdd());
+          if (instruction->IsSub()) {
+            imm = -imm;
+          }
+          if (isR6) {
+            bool single_use = right->GetUses().HasExactlyOneElement();
+            int16_t imm_high = High16Bits(imm);
+            int16_t imm_low = Low16Bits(imm);
+            if (imm_low < 0) {
+              imm_high += 1;
+            }
+            can_use_imm = !((imm_high != 0) && (imm_low != 0)) || single_use;
+          } else {
+            can_use_imm = IsInt<16>(imm);
+          }
         }
       }
       if (can_use_imm)
@@ -1988,6 +2000,7 @@
 void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) {
   DataType::Type type = instruction->GetType();
   LocationSummary* locations = instruction->GetLocations();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
 
   switch (type) {
     case DataType::Type::kInt32: {
@@ -2019,17 +2032,32 @@
           __ Xori(dst, lhs, rhs_imm);
         else
           __ Xor(dst, lhs, rhs_reg);
-      } else if (instruction->IsAdd()) {
-        if (use_imm)
-          __ Addiu(dst, lhs, rhs_imm);
-        else
-          __ Addu(dst, lhs, rhs_reg);
       } else {
-        DCHECK(instruction->IsSub());
-        if (use_imm)
-          __ Addiu(dst, lhs, -rhs_imm);
-        else
+        DCHECK(instruction->IsAdd() || instruction->IsSub());
+        if (use_imm) {
+          if (instruction->IsSub()) {
+            rhs_imm = -rhs_imm;
+          }
+          if (IsInt<16>(rhs_imm)) {
+            __ Addiu(dst, lhs, rhs_imm);
+          } else {
+            DCHECK(isR6);
+            int16_t rhs_imm_high = High16Bits(rhs_imm);
+            int16_t rhs_imm_low = Low16Bits(rhs_imm);
+            if (rhs_imm_low < 0) {
+              rhs_imm_high += 1;
+            }
+            __ Aui(dst, lhs, rhs_imm_high);
+            if (rhs_imm_low != 0) {
+              __ Addiu(dst, dst, rhs_imm_low);
+            }
+          }
+        } else if (instruction->IsAdd()) {
+          __ Addu(dst, lhs, rhs_reg);
+        } else {
+          DCHECK(instruction->IsSub());
           __ Subu(dst, lhs, rhs_reg);
+        }
       }
       break;
     }
@@ -3103,23 +3131,92 @@
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+
+  bool const_index = false;
+  bool const_length = false;
+
+  if (index->IsConstant()) {
+    if (length->IsConstant()) {
+      const_index = true;
+      const_length = true;
+    } else {
+      int32_t index_value = index->AsIntConstant()->GetValue();
+      if (index_value < 0 || IsInt<16>(index_value + 1)) {
+        const_index = true;
+      }
+    }
+  } else if (length->IsConstant()) {
+    int32_t length_value = length->AsIntConstant()->GetValue();
+    if (IsUint<15>(length_value)) {
+      const_length = true;
+    }
+  }
+
+  locations->SetInAt(0, const_index
+      ? Location::ConstantLocation(index->AsConstant())
+      : Location::RequiresRegister());
+  locations->SetInAt(1, const_length
+      ? Location::ConstantLocation(length->AsConstant())
+      : Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathMIPS* slow_path =
-      new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction);
-  codegen_->AddSlowPath(slow_path);
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
 
-  Register index = locations->InAt(0).AsRegister<Register>();
-  Register length = locations->InAt(1).AsRegister<Register>();
+  if (length_loc.IsConstant()) {
+    int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue();
+    if (index_loc.IsConstant()) {
+      int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+      if (index < 0 || index >= length) {
+        BoundsCheckSlowPathMIPS* slow_path =
+            new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ B(slow_path->GetEntryLabel());
+      } else {
+        // Nothing to be done.
+      }
+      return;
+    }
 
-  // length is limited by the maximum positive signed 32-bit integer.
-  // Unsigned comparison of length and index checks for index < 0
-  // and for length <= index simultaneously.
-  __ Bgeu(index, length, slow_path->GetEntryLabel());
+    BoundsCheckSlowPathMIPS* slow_path =
+        new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction);
+    codegen_->AddSlowPath(slow_path);
+    Register index = index_loc.AsRegister<Register>();
+    if (length == 0) {
+      __ B(slow_path->GetEntryLabel());
+    } else if (length == 1) {
+      __ Bnez(index, slow_path->GetEntryLabel());
+    } else {
+      DCHECK(IsUint<15>(length)) << length;
+      __ Sltiu(TMP, index, length);
+      __ Beqz(TMP, slow_path->GetEntryLabel());
+    }
+  } else {
+    Register length = length_loc.AsRegister<Register>();
+    BoundsCheckSlowPathMIPS* slow_path =
+        new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction);
+    codegen_->AddSlowPath(slow_path);
+    if (index_loc.IsConstant()) {
+      int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+      if (index < 0) {
+        __ B(slow_path->GetEntryLabel());
+      } else if (index == 0) {
+        __ Blez(length, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(IsInt<16>(index + 1)) << index;
+        __ Sltiu(TMP, length, index + 1);
+        __ Bnez(TMP, slow_path->GetEntryLabel());
+      }
+    } else {
+      Register index = index_loc.AsRegister<Register>();
+      __ Bgeu(index, length, slow_path->GetEntryLabel());
+    }
+  }
 }
 
 // Temp is used for read barrier.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index ee33b3f..22989c8 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1793,11 +1793,19 @@
         int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
         if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
           can_use_imm = IsUint<16>(imm);
-        } else if (instruction->IsAdd()) {
-          can_use_imm = IsInt<16>(imm);
         } else {
-          DCHECK(instruction->IsSub());
-          can_use_imm = IsInt<16>(-imm);
+          DCHECK(instruction->IsAdd() || instruction->IsSub());
+          bool single_use = right->GetUses().HasExactlyOneElement();
+          if (instruction->IsSub()) {
+            if (!(type == DataType::Type::kInt32 && imm == INT32_MIN)) {
+              imm = -imm;
+            }
+          }
+          if (type == DataType::Type::kInt32) {
+            can_use_imm = IsInt<16>(imm) || (Low16Bits(imm) == 0) || single_use;
+          } else {
+            can_use_imm = IsInt<16>(imm) || (IsInt<32>(imm) && (Low16Bits(imm) == 0)) || single_use;
+          }
         }
       }
       if (can_use_imm)
@@ -1855,30 +1863,90 @@
           __ Xori(dst, lhs, rhs_imm);
         else
           __ Xor(dst, lhs, rhs_reg);
-      } else if (instruction->IsAdd()) {
-        if (type == DataType::Type::kInt32) {
-          if (use_imm)
-            __ Addiu(dst, lhs, rhs_imm);
-          else
-            __ Addu(dst, lhs, rhs_reg);
-        } else {
-          if (use_imm)
-            __ Daddiu(dst, lhs, rhs_imm);
-          else
-            __ Daddu(dst, lhs, rhs_reg);
+      } else if (instruction->IsAdd() || instruction->IsSub()) {
+        if (instruction->IsSub()) {
+          rhs_imm = -rhs_imm;
         }
-      } else {
-        DCHECK(instruction->IsSub());
         if (type == DataType::Type::kInt32) {
-          if (use_imm)
-            __ Addiu(dst, lhs, -rhs_imm);
-          else
-            __ Subu(dst, lhs, rhs_reg);
+          if (use_imm) {
+            if (IsInt<16>(rhs_imm)) {
+              __ Addiu(dst, lhs, rhs_imm);
+            } else {
+              int16_t rhs_imm_high = High16Bits(rhs_imm);
+              int16_t rhs_imm_low = Low16Bits(rhs_imm);
+              if (rhs_imm_low < 0) {
+                rhs_imm_high += 1;
+              }
+              __ Aui(dst, lhs, rhs_imm_high);
+              if (rhs_imm_low != 0) {
+                __ Addiu(dst, dst, rhs_imm_low);
+              }
+            }
+          } else {
+            if (instruction->IsAdd()) {
+              __ Addu(dst, lhs, rhs_reg);
+            } else {
+              DCHECK(instruction->IsSub());
+              __ Subu(dst, lhs, rhs_reg);
+            }
+          }
         } else {
-          if (use_imm)
-            __ Daddiu(dst, lhs, -rhs_imm);
-          else
+          if (use_imm) {
+            if (IsInt<16>(rhs_imm)) {
+              __ Daddiu(dst, lhs, rhs_imm);
+            } else if (IsInt<32>(rhs_imm)) {
+              int16_t rhs_imm_high = High16Bits(rhs_imm);
+              int16_t rhs_imm_low = Low16Bits(rhs_imm);
+              bool overflow_hi16 = false;
+              if (rhs_imm_low < 0) {
+                rhs_imm_high += 1;
+                overflow_hi16 = (rhs_imm_high == -32768);
+              }
+              __ Daui(dst, lhs, rhs_imm_high);
+              if (rhs_imm_low != 0) {
+                __ Daddiu(dst, dst, rhs_imm_low);
+              }
+              if (overflow_hi16) {
+                __ Dahi(dst, 1);
+              }
+            } else {
+              int16_t rhs_imm_low = Low16Bits(Low32Bits(rhs_imm));
+              if (rhs_imm_low < 0) {
+                rhs_imm += (INT64_C(1) << 16);
+              }
+              int16_t rhs_imm_upper = High16Bits(Low32Bits(rhs_imm));
+              if (rhs_imm_upper < 0) {
+                rhs_imm += (INT64_C(1) << 32);
+              }
+              int16_t rhs_imm_high = Low16Bits(High32Bits(rhs_imm));
+              if (rhs_imm_high < 0) {
+                rhs_imm += (INT64_C(1) << 48);
+              }
+              int16_t rhs_imm_top = High16Bits(High32Bits(rhs_imm));
+              GpuRegister tmp = lhs;
+              if (rhs_imm_low != 0) {
+                __ Daddiu(dst, tmp, rhs_imm_low);
+                tmp = dst;
+              }
+              // Dahi and Dati must use the same input and output register, so we have to initialize
+              // the dst register using Daddiu or Daui, even when the intermediate value is zero:
+              // Daui(dst, lhs, 0).
+              if ((rhs_imm_upper != 0) || (rhs_imm_low == 0)) {
+                __ Daui(dst, tmp, rhs_imm_upper);
+              }
+              if (rhs_imm_high != 0) {
+                __ Dahi(dst, rhs_imm_high);
+              }
+              if (rhs_imm_top != 0) {
+                __ Dati(dst, rhs_imm_top);
+              }
+            }
+          } else if (instruction->IsAdd()) {
+            __ Daddu(dst, lhs, rhs_reg);
+          } else {
+            DCHECK(instruction->IsSub());
             __ Dsubu(dst, lhs, rhs_reg);
+          }
         }
       }
       break;
@@ -2614,23 +2682,92 @@
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+
+  bool const_index = false;
+  bool const_length = false;
+
+  if (index->IsConstant()) {
+    if (length->IsConstant()) {
+      const_index = true;
+      const_length = true;
+    } else {
+      int32_t index_value = index->AsIntConstant()->GetValue();
+      if (index_value < 0 || IsInt<16>(index_value + 1)) {
+        const_index = true;
+      }
+    }
+  } else if (length->IsConstant()) {
+    int32_t length_value = length->AsIntConstant()->GetValue();
+    if (IsUint<15>(length_value)) {
+      const_length = true;
+    }
+  }
+
+  locations->SetInAt(0, const_index
+      ? Location::ConstantLocation(index->AsConstant())
+      : Location::RequiresRegister());
+  locations->SetInAt(1, const_length
+      ? Location::ConstantLocation(length->AsConstant())
+      : Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathMIPS64* slow_path =
-      new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction);
-  codegen_->AddSlowPath(slow_path);
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
 
-  GpuRegister index = locations->InAt(0).AsRegister<GpuRegister>();
-  GpuRegister length = locations->InAt(1).AsRegister<GpuRegister>();
+  if (length_loc.IsConstant()) {
+    int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue();
+    if (index_loc.IsConstant()) {
+      int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+      if (index < 0 || index >= length) {
+        BoundsCheckSlowPathMIPS64* slow_path =
+            new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ Bc(slow_path->GetEntryLabel());
+      } else {
+        // Nothing to be done.
+      }
+      return;
+    }
 
-  // length is limited by the maximum positive signed 32-bit integer.
-  // Unsigned comparison of length and index checks for index < 0
-  // and for length <= index simultaneously.
-  __ Bgeuc(index, length, slow_path->GetEntryLabel());
+    BoundsCheckSlowPathMIPS64* slow_path =
+        new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction);
+    codegen_->AddSlowPath(slow_path);
+    GpuRegister index = index_loc.AsRegister<GpuRegister>();
+    if (length == 0) {
+      __ Bc(slow_path->GetEntryLabel());
+    } else if (length == 1) {
+      __ Bnezc(index, slow_path->GetEntryLabel());
+    } else {
+      DCHECK(IsUint<15>(length)) << length;
+      __ Sltiu(TMP, index, length);
+      __ Beqzc(TMP, slow_path->GetEntryLabel());
+    }
+  } else {
+    GpuRegister length = length_loc.AsRegister<GpuRegister>();
+    BoundsCheckSlowPathMIPS64* slow_path =
+        new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction);
+    codegen_->AddSlowPath(slow_path);
+    if (index_loc.IsConstant()) {
+      int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+      if (index < 0) {
+        __ Bc(slow_path->GetEntryLabel());
+      } else if (index == 0) {
+        __ Blezc(length, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(IsInt<16>(index + 1)) << index;
+        __ Sltiu(TMP, length, index + 1);
+        __ Bnezc(TMP, slow_path->GetEntryLabel());
+      }
+    } else {
+      GpuRegister index = index_loc.AsRegister<GpuRegister>();
+      __ Bgeuc(index, length, slow_path->GetEntryLabel());
+    }
+  }
 }
 
 // Temp is used for read barrier.
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 96fe2a1..dd47a1f 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -15,9 +15,10 @@
  */
 
 #include "code_generator_utils.h"
-#include "nodes.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "nodes.h"
 
 namespace art {
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 2e8170e..42ee9db 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -5732,24 +5732,18 @@
   return codegen_->GetAssembler();
 }
 
-void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
+void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) {
   ScratchRegisterScope ensure_scratch(
       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
   Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
   int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, src + stack_offset));
-  __ movl(Address(ESP, dst + stack_offset), temp_reg);
-}
 
-void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, src + stack_offset));
-  __ movl(Address(ESP, dst + stack_offset), temp_reg);
-  __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
-  __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+  // Now that temp register is available (possibly spilled), move blocks of memory.
+  for (int i = 0; i < number_of_words; i++) {
+    __ movl(temp_reg, Address(ESP, src + stack_offset));
+    __ movl(Address(ESP, dst + stack_offset), temp_reg);
+    stack_offset += kX86WordSize;
+  }
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -5800,7 +5794,7 @@
       __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     } else {
       DCHECK(destination.IsStackSlot());
-      MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex());
+      MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
     }
   } else if (source.IsDoubleStackSlot()) {
     if (destination.IsRegisterPair()) {
@@ -5811,11 +5805,15 @@
       __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
-      MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
+      MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
     }
   } else if (source.IsSIMDStackSlot()) {
-    DCHECK(destination.IsFpuRegister());
-    __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
+    if (destination.IsFpuRegister()) {
+      __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsSIMDStackSlot());
+      MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
+    }
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
     if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5915,7 +5913,16 @@
   __ movd(reg, temp_reg);
 }
 
-void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
+void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) {
+  size_t extra_slot = 4 * kX86WordSize;
+  __ subl(ESP, Immediate(extra_slot));
+  __ movups(Address(ESP, 0), XmmRegister(reg));
+  ExchangeMemory(0, mem + extra_slot, 4);
+  __ movups(XmmRegister(reg), Address(ESP, 0));
+  __ addl(ESP, Immediate(extra_slot));
+}
+
+void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) {
   ScratchRegisterScope ensure_scratch1(
       this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
 
@@ -5925,10 +5932,15 @@
 
   int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
   stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
-  __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
-  __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
-  __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
-  __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+
+  // Now that temp registers are available (possibly spilled), exchange blocks of memory.
+  for (int i = 0; i < number_of_words; i++) {
+    __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
+    __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
+    __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
+    __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+    stack_offset += kX86WordSize;
+  }
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -5947,7 +5959,7 @@
   } else if (source.IsStackSlot() && destination.IsRegister()) {
     Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+    ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1);
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
     // Use XOR Swap algorithm to avoid a temporary.
     DCHECK_NE(source.reg(), destination.reg());
@@ -5983,8 +5995,13 @@
     // Move the high double to the low double.
     __ psrldq(reg, Immediate(8));
   } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) {
-    Exchange(destination.GetStackIndex(), source.GetStackIndex());
-    Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize));
+    ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2);
+  } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
+    ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4);
+  } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
+    Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
+  } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
+    Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination;
   }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 176e4df..40b7e3c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -139,10 +139,10 @@
 
  private:
   void Exchange(Register reg, int mem);
-  void Exchange(int mem1, int mem2);
   void Exchange32(XmmRegister reg, int mem);
-  void MoveMemoryToMemory32(int dst, int src);
-  void MoveMemoryToMemory64(int dst, int src);
+  void Exchange128(XmmRegister reg, int mem);
+  void ExchangeMemory(int mem1, int mem2, int number_of_words);
+  void MoveMemoryToMemory(int dst, int src, int number_of_words);
 
   CodeGeneratorX86* const codegen_;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e25688c..02fbf23 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5220,9 +5220,17 @@
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
   } else if (source.IsSIMDStackSlot()) {
-    DCHECK(destination.IsFpuRegister());
-    __ movups(destination.AsFpuRegister<XmmRegister>(),
-              Address(CpuRegister(RSP), source.GetStackIndex()));
+    if (destination.IsFpuRegister()) {
+      __ movups(destination.AsFpuRegister<XmmRegister>(),
+                Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsSIMDStackSlot());
+      size_t high = kX86_64WordSize;
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
+    }
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
     if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5290,19 +5298,6 @@
   __ movl(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
-  ScratchRegisterScope ensure_scratch(
-      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
-
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
-  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
-  __ movl(CpuRegister(ensure_scratch.GetRegister()),
-          Address(CpuRegister(RSP), mem2 + stack_offset));
-  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
-  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
-          CpuRegister(ensure_scratch.GetRegister()));
-}
-
 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
   __ movq(CpuRegister(TMP), reg1);
   __ movq(reg1, reg2);
@@ -5315,19 +5310,6 @@
   __ movq(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
-  ScratchRegisterScope ensure_scratch(
-      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
-
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
-  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
-  __ movq(CpuRegister(ensure_scratch.GetRegister()),
-          Address(CpuRegister(RSP), mem2 + stack_offset));
-  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
-  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
-          CpuRegister(ensure_scratch.GetRegister()));
-}
-
 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   __ movss(Address(CpuRegister(RSP), mem), reg);
@@ -5340,6 +5322,48 @@
   __ movd(reg, CpuRegister(TMP));
 }
 
+void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
+  size_t extra_slot = 2 * kX86_64WordSize;
+  __ subq(CpuRegister(RSP), Immediate(extra_slot));
+  __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
+  ExchangeMemory64(0, mem + extra_slot, 2);
+  __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
+  __ addq(CpuRegister(RSP), Immediate(extra_slot));
+}
+
+void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movl(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
+void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+
+  // Now that temp registers are available (possibly spilled), exchange blocks of memory.
+  for (int i = 0; i < num_of_qwords; i++) {
+    __ movq(CpuRegister(TMP),
+            Address(CpuRegister(RSP), mem1 + stack_offset));
+    __ movq(CpuRegister(ensure_scratch.GetRegister()),
+            Address(CpuRegister(RSP), mem2 + stack_offset));
+    __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
+            CpuRegister(TMP));
+    __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+            CpuRegister(ensure_scratch.GetRegister()));
+    stack_offset += kX86_64WordSize;
+  }
+}
+
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
@@ -5352,13 +5376,13 @@
   } else if (source.IsStackSlot() && destination.IsRegister()) {
     Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
+    ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
     Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
     Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
-    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
+    ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
     __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
     __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
@@ -5371,6 +5395,12 @@
     Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
     Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
+  } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
+    ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
+  } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
+    Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
+  } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
+    Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 00c5c27..e86123e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -139,11 +139,12 @@
  private:
   void Exchange32(CpuRegister reg, int mem);
   void Exchange32(XmmRegister reg, int mem);
-  void Exchange32(int mem1, int mem2);
   void Exchange64(CpuRegister reg1, CpuRegister reg2);
   void Exchange64(CpuRegister reg, int mem);
   void Exchange64(XmmRegister reg, int mem);
-  void Exchange64(int mem1, int mem2);
+  void Exchange128(XmmRegister reg, int mem);
+  void ExchangeMemory32(int mem1, int mem2);
+  void ExchangeMemory64(int mem1, int mem2, int num_of_qwords);
 
   CodeGeneratorX86_64* const codegen_;
 
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index bb586bf..6f11e62 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -113,7 +113,7 @@
 void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
   // Constant folding: replace `TypeConversion(a)' with a constant at
   // compile time if `a' is a constant.
-  HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation();
+  HConstant* constant = inst->TryStaticEvaluation();
   if (constant != nullptr) {
     inst->ReplaceWith(constant);
     inst->GetBlock()->RemoveInstruction(inst);
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index 75a7fbe..548fe28 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -19,7 +19,8 @@
 
 #include <iosfwd>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
 
 namespace art {
@@ -186,6 +187,7 @@
   }
 
   static bool IsTypeConversionImplicit(Type input_type, Type result_type);
+  static bool IsTypeConversionImplicit(int64_t value, Type result_type);
 
   static const char* PrettyDescriptor(Type type);
 
@@ -213,6 +215,18 @@
           MaxValueOfIntegralType(input_type) <= MaxValueOfIntegralType(result_type));
 }
 
+inline bool DataType::IsTypeConversionImplicit(int64_t value, Type result_type) {
+  if (IsIntegralType(result_type) && result_type != Type::kInt64) {
+    // If the constant value falls in the range of the result_type, type
+    // conversion isn't needed.
+    return value >= MinValueOfIntegralType(result_type) &&
+           value <= MaxValueOfIntegralType(result_type);
+  }
+  // Conversion isn't implicit if it's into non-integer types, or 64-bit int
+  // which may have different number of registers.
+  return false;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_DATA_TYPE_H_
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3f4a3d8..8750910 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -294,7 +294,7 @@
     // as there may be different class loaders. So only return the index if it's
     // the right class already resolved with the class loader.
     if (index.IsValid()) {
-      ObjPtr<mirror::Class> resolved = ClassLinker::LookupResolvedType(
+      ObjPtr<mirror::Class> resolved = compilation_unit.GetClassLinker()->LookupResolvedType(
           index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get());
       if (resolved != cls) {
         index = dex::TypeIndex::Invalid();
@@ -441,9 +441,9 @@
         // Add dependency due to devirtulization. We've assumed resolved_method
         // has single implementation.
         outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
-        MaybeRecordStat(stats_, kCHAInline);
+        MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline);
       } else {
-        MaybeRecordStat(stats_, kInlinedInvokeVirtualOrInterface);
+        MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
       }
     }
     return result;
@@ -533,7 +533,7 @@
     }
 
     case kInlineCacheMonomorphic: {
-      MaybeRecordStat(stats_, kMonomorphicCall);
+      MaybeRecordStat(stats_, MethodCompilationStat::kMonomorphicCall);
       if (UseOnlyPolymorphicInliningWithNoDeopt()) {
         return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
@@ -542,7 +542,7 @@
     }
 
     case kInlineCachePolymorphic: {
-      MaybeRecordStat(stats_, kPolymorphicCall);
+      MaybeRecordStat(stats_, MethodCompilationStat::kPolymorphicCall);
       return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
     }
 
@@ -551,7 +551,7 @@
           << "Interface or virtual call to "
           << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
           << " is megamorphic and not inlined";
-      MaybeRecordStat(stats_, kMegamorphicCall);
+      MaybeRecordStat(stats_, MethodCompilationStat::kMegamorphicCall);
       return false;
     }
 
@@ -682,7 +682,7 @@
             << "is invalid in location" << dex_cache->GetDexFile()->GetLocation();
       return kInlineCacheNoData;
     }
-    ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType(
+    ObjPtr<mirror::Class> clazz = caller_compilation_unit_.GetClassLinker()->LookupResolvedType(
           class_ref.type_index,
           dex_cache,
           caller_compilation_unit_.GetClassLoader().Get());
@@ -755,7 +755,7 @@
   dex::TypeIndex class_index = FindClassIndexIn(
       GetMonomorphicType(classes), caller_compilation_unit_);
   if (!class_index.IsValid()) {
-    LOG_FAIL(stats_, kNotInlinedDexCache)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache)
         << "Call to " << ArtMethod::PrettyMethod(resolved_method)
         << " from inline cache is not inlined because its class is not"
         << " accessible to the caller";
@@ -804,7 +804,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(stats_, kInlinedMonomorphicCall);
+  MaybeRecordStat(stats_, MethodCompilationStat::kInlinedMonomorphicCall);
   return true;
 }
 
@@ -876,9 +876,9 @@
       load_class, codegen_, compiler_driver_, caller_compilation_unit_);
   DCHECK(kind != HLoadClass::LoadKind::kInvalid)
       << "We should always be able to reference a class for inline caches";
-  // Insert before setting the kind, as setting the kind affects the inputs.
-  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  // Load kind must be set before inserting the instruction into the graph.
   load_class->SetLoadKind(kind);
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
   // In AOT mode, we will most likely load the class from BSS, which will involve a call
   // to the runtime. In this case, the load instruction will need an environment so copy
   // it from the invoke instruction.
@@ -994,7 +994,7 @@
     return false;
   }
 
-  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall);
 
   // Run type propagation to get the guards typed.
   ReferenceTypePropagation rtp_fixup(graph_,
@@ -1200,7 +1200,7 @@
                                      /* is_first_run */ false);
   rtp_fixup.Run();
 
-  MaybeRecordStat(stats_, kInlinedPolymorphicCall);
+  MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall);
 
   LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
   return true;
@@ -1211,11 +1211,49 @@
                                    ReferenceTypeInfo receiver_type,
                                    bool do_rtp,
                                    bool cha_devirtualize) {
+  DCHECK(!invoke_instruction->IsIntrinsic());
   HInstruction* return_replacement = nullptr;
   uint32_t dex_pc = invoke_instruction->GetDexPc();
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
-  if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
+  bool should_remove_invoke_instruction = false;
+
+  // If invoke_instruction is devirtualized to a different method, give intrinsics
+  // another chance before we try to inline it.
+  bool wrong_invoke_type = false;
+  if (invoke_instruction->GetResolvedMethod() != method &&
+      IntrinsicsRecognizer::Recognize(invoke_instruction, method, &wrong_invoke_type)) {
+    MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized);
+    if (invoke_instruction->IsInvokeInterface()) {
+      // We don't intrinsify an invoke-interface directly.
+      // Replace the invoke-interface with an invoke-virtual.
+      HInvokeVirtual* new_invoke = new (graph_->GetAllocator()) HInvokeVirtual(
+          graph_->GetAllocator(),
+          invoke_instruction->GetNumberOfArguments(),
+          invoke_instruction->GetType(),
+          invoke_instruction->GetDexPc(),
+          invoke_instruction->GetDexMethodIndex(),  // Use interface method's dex method index.
+          method,
+          method->GetMethodIndex());
+      HInputsRef inputs = invoke_instruction->GetInputs();
+      for (size_t index = 0; index != inputs.size(); ++index) {
+        new_invoke->SetArgumentAt(index, inputs[index]);
+      }
+      invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction);
+      new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+      if (invoke_instruction->GetType() == DataType::Type::kReference) {
+        new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
+      }
+      // Run intrinsic recognizer again to set new_invoke's intrinsic.
+      IntrinsicsRecognizer::Recognize(new_invoke, method, &wrong_invoke_type);
+      DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone);
+      return_replacement = new_invoke;
+      // invoke_instruction is replaced with new_invoke.
+      should_remove_invoke_instruction = true;
+    } else {
+      // invoke_instruction is intrinsified and stays.
+    }
+  } else if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
     if (invoke_instruction->IsInvokeInterface()) {
       DCHECK(!method->IsProxyMethod());
       // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
@@ -1258,19 +1296,27 @@
         new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
       }
       return_replacement = new_invoke;
+      // invoke_instruction is replaced with new_invoke.
+      should_remove_invoke_instruction = true;
     } else {
       // TODO: Consider sharpening an invoke virtual once it is not dependent on the
       // compiler driver.
       return false;
     }
+  } else {
+    // invoke_instruction is inlined.
+    should_remove_invoke_instruction = true;
   }
+
   if (cha_devirtualize) {
     AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
   }
   if (return_replacement != nullptr) {
     invoke_instruction->ReplaceWith(return_replacement);
   }
-  invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  if (should_remove_invoke_instruction) {
+    invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+  }
   FixUpReturnReferenceType(method, return_replacement);
   if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) {
     // Actual return value has a more specific type than the method's declared
@@ -1301,14 +1347,14 @@
                                  ReferenceTypeInfo receiver_type,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    LOG_FAIL(stats_, kNotInlinedProxy)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedProxy)
         << "Method " << method->PrettyMethod()
         << " is not inlined because of unimplemented inline support for proxy methods.";
     return false;
   }
 
   if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
-    LOG_FAIL(stats_, kNotInlinedRecursiveBudget)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget)
         << "Method "
         << method->PrettyMethod()
         << " is not inlined because it has reached its recursive call budget.";
@@ -1322,10 +1368,10 @@
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
       LOG_SUCCESS() << "Successfully replaced pattern of invoke "
                     << method->PrettyMethod();
-      MaybeRecordStat(stats_, kReplacedInvokeWithSimplePattern);
+      MaybeRecordStat(stats_, MethodCompilationStat::kReplacedInvokeWithSimplePattern);
       return true;
     }
-    LOG_FAIL(stats_, kNotInlinedWont)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedWont)
         << "Won't inline " << method->PrettyMethod() << " in "
         << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
         << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
@@ -1345,7 +1391,7 @@
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    LOG_FAIL(stats_, kNotInlinedCodeItem)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem)
         << "Method " << method->PrettyMethod()
         << " is not inlined because its code item is too big: "
         << code_item->insns_size_in_code_units_
@@ -1355,13 +1401,13 @@
   }
 
   if (code_item->tries_size_ != 0) {
-    LOG_FAIL(stats_, kNotInlinedTryCatch)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch)
         << "Method " << method->PrettyMethod() << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    LOG_FAIL(stats_, kNotInlinedNotVerified)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified)
         << "Method " << method->PrettyMethod()
         << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
@@ -1371,7 +1417,7 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      LOG_FAIL(stats_, kNotInlinedNotVerified)
+      LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified)
           << "Method " << method->PrettyMethod()
           << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -1382,9 +1428,10 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    LOG_FAIL(stats_, kNotInlinedDexCache) << "Method " << method->PrettyMethod()
-             << " is not inlined because it is static and requires a clinit"
-             << " check that cannot be emitted due to Dex cache limitations";
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache)
+        << "Method " << method->PrettyMethod()
+        << " is not inlined because it is static and requires a clinit"
+        << " check that cannot be emitted due to Dex cache limitations";
     return false;
   }
 
@@ -1394,7 +1441,7 @@
   }
 
   LOG_SUCCESS() << method->PrettyMethod();
-  MaybeRecordStat(stats_, kInlinedInvoke);
+  MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvoke);
   return true;
 }
 
@@ -1677,7 +1724,7 @@
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    LOG_FAIL(stats_, kNotInlinedCannotBuild)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCannotBuild)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be built, so cannot be inlined";
     return false;
@@ -1685,7 +1732,7 @@
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    LOG_FAIL(stats_, kNotInlinedRegisterAllocator)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " cannot be inlined because of the register allocator";
     return false;
@@ -1738,7 +1785,7 @@
 
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it has an infinite loop";
     return false;
@@ -1749,14 +1796,14 @@
     if (predecessor->GetLastInstruction()->IsThrow()) {
       if (invoke_instruction->GetBlock()->IsTryBlock()) {
         // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
-        LOG_FAIL(stats_, kNotInlinedTryCatch)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller is in a try/catch block";
         return false;
       } else if (graph_->GetExitBlock() == nullptr) {
         // TODO(ngeoffray): Support adding HExit in the caller graph.
-        LOG_FAIL(stats_, kNotInlinedInfiniteLoop)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because one branch always throws and"
             << " caller does not have an exit block";
@@ -1775,7 +1822,7 @@
   }
 
   if (!has_one_return) {
-    LOG_FAIL(stats_, kNotInlinedAlwaysThrows)
+    LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedAlwaysThrows)
         << "Method " << callee_dex_file.PrettyMethod(method_index)
         << " could not be inlined because it always throws";
     return false;
@@ -1788,7 +1835,7 @@
       if (block->GetLoopInformation()->IsIrreducible()) {
         // Don't inline methods with irreducible loops, they could prevent some
         // optimizations to run.
-        LOG_FAIL(stats_, kNotInlinedIrreducibleLoop)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoop)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains an irreducible loop";
         return false;
@@ -1797,7 +1844,7 @@
         // Don't inline methods with loops without exit, since they cause the
         // loop information to be computed incorrectly when updating after
         // inlining.
-        LOG_FAIL(stats_, kNotInlinedLoopWithoutExit)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedLoopWithoutExit)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it contains a loop with no exit";
         return false;
@@ -1808,7 +1855,7 @@
          !instr_it.Done();
          instr_it.Advance()) {
       if (++number_of_instructions >= inlining_budget_) {
-        LOG_FAIL(stats_, kNotInlinedInstructionBudget)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInstructionBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because the outer method has reached"
             << " its instruction budget limit.";
@@ -1817,7 +1864,7 @@
       HInstruction* current = instr_it.Current();
       if (current->NeedsEnvironment() &&
           (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
-        LOG_FAIL(stats_, kNotInlinedEnvironmentBudget)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEnvironmentBudget)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " is not inlined because its caller has reached"
             << " its environment budget limit.";
@@ -1827,7 +1874,7 @@
       if (current->NeedsEnvironment() &&
           !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
                                             resolved_method)) {
-        LOG_FAIL(stats_, kNotInlinedStackMaps)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedStackMaps)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " needs an environment, is in a different dex file"
@@ -1836,7 +1883,7 @@
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        LOG_FAIL(stats_, kNotInlinedDexCache)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because " << current->DebugName()
             << " it is in a different dex file and requires access to the dex cache";
@@ -1848,7 +1895,7 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        LOG_FAIL(stats_, kNotInlinedUnresolvedEntrypoint)
+        LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedUnresolvedEntrypoint)
             << "Method " << callee_dex_file.PrettyMethod(method_index)
             << " could not be inlined because it is using an unresolved"
             << " entrypoint";
@@ -1885,7 +1932,7 @@
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
   HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
-  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
+  HSharpening sharpening(callee_graph, codegen_, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 978d0c2..e36d91f 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -442,17 +442,15 @@
       return false;
     }
   };
-  const uint32_t num_instructions = code_item_->insns_size_in_code_units_;
+  CodeItemDebugInfoAccessor accessor(dex_file_, code_item_);
   ArenaBitVector* locations = ArenaBitVector::Create(local_allocator_,
-                                                     num_instructions,
+                                                     accessor.InsnsSizeInCodeUnits(),
                                                      /* expandable */ false,
                                                      kArenaAllocGraphBuilder);
   locations->ClearAllBits();
-  uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*dex_file_, code_item_);
-  dex_file_->DecodeDebugPositionInfo(code_item_, debug_info_offset, Callback::Position, locations);
+  dex_file_->DecodeDebugPositionInfo(accessor.DebugInfoOffset(), Callback::Position, locations);
   // Instruction-specific tweaks.
-  IterationRange<DexInstructionIterator> instructions = code_item_->Instructions();
-  for (const DexInstructionPcPair& inst : instructions) {
+  for (const DexInstructionPcPair& inst : accessor) {
     switch (inst->Opcode()) {
       case Instruction::MOVE_EXCEPTION: {
         // Stop in native debugger after the exception has been moved.
@@ -461,7 +459,7 @@
         locations->ClearBit(inst.DexPc());
         DexInstructionIterator next = std::next(DexInstructionIterator(inst));
         DCHECK(next.DexPc() != inst.DexPc());
-        if (next != instructions.end()) {
+        if (next != accessor.end()) {
           locations->SetBit(next.DexPc());
         }
         break;
@@ -796,7 +794,6 @@
 
   ArtMethod* resolved_method =
       class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
-          *dex_compilation_unit_->GetDexFile(),
           method_idx,
           dex_compilation_unit_->GetDexCache(),
           class_loader,
@@ -831,7 +828,6 @@
       return nullptr;
     }
     ObjPtr<mirror::Class> referenced_class = class_linker->LookupResolvedType(
-        *dex_compilation_unit_->GetDexFile(),
         dex_compilation_unit_->GetDexFile()->GetMethodId(method_idx).class_idx_,
         dex_compilation_unit_->GetDexCache().Get(),
         class_loader.Get());
@@ -1128,7 +1124,7 @@
       MethodCompilationStat::kConstructorFenceGeneratedNew);
 }
 
-static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+static bool IsSubClass(ObjPtr<mirror::Class> to_test, ObjPtr<mirror::Class> super_class)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
 }
@@ -1351,6 +1347,8 @@
   uint16_t field_index;
   if (instruction.IsQuickened()) {
     if (!CanDecodeQuickenedInfo()) {
+      VLOG(compiler) << "Not compiled: Could not decode quickened instruction "
+                     << instruction.Opcode();
       return false;
     }
     field_index = LookupQuickenedInfo(quicken_index);
@@ -1422,8 +1420,8 @@
   return true;
 }
 
-static mirror::Class* GetClassFrom(CompilerDriver* driver,
-                                   const DexCompilationUnit& compilation_unit) {
+static ObjPtr<mirror::Class> GetClassFrom(CompilerDriver* driver,
+                                          const DexCompilationUnit& compilation_unit) {
   ScopedObjectAccess soa(Thread::Current());
   Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader();
   Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
@@ -1431,11 +1429,11 @@
   return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
 }
 
-mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const {
+ObjPtr<mirror::Class> HInstructionBuilder::GetOutermostCompilingClass() const {
   return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
 }
 
-mirror::Class* HInstructionBuilder::GetCompilingClass() const {
+ObjPtr<mirror::Class> HInstructionBuilder::GetCompilingClass() const {
   return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
 }
 
@@ -1482,12 +1480,10 @@
   Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
   Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
 
-  ArtField* resolved_field = class_linker->ResolveField(*dex_compilation_unit_->GetDexFile(),
-                                                        field_idx,
+  ArtField* resolved_field = class_linker->ResolveField(field_idx,
                                                         dex_compilation_unit_->GetDexCache(),
                                                         class_loader,
                                                         is_static);
-
   if (UNLIKELY(resolved_field == nullptr)) {
     // Clean up any exception left by type resolution.
     soa.Self()->ClearException();
@@ -1523,7 +1519,7 @@
   return resolved_field;
 }
 
-bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
+void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
                                                  uint32_t dex_pc,
                                                  bool is_put) {
   uint32_t source_or_dest_reg = instruction.VRegA_21c();
@@ -1537,7 +1533,7 @@
                     MethodCompilationStat::kUnresolvedField);
     DataType::Type field_type = GetFieldAccessType(*dex_file_, field_index);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-    return true;
+    return;
   }
 
   DataType::Type field_type = GetFieldAccessType(*dex_file_, field_index);
@@ -1555,7 +1551,7 @@
     MaybeRecordStat(compilation_stats_,
                     MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
     BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
-    return true;
+    return;
   }
 
   HInstruction* cls = constant;
@@ -1591,7 +1587,6 @@
                                                        dex_pc));
     UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
   }
-  return true;
 }
 
 void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
@@ -1799,6 +1794,17 @@
   }
 }
 
+void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
+  HLoadString* load_string =
+      new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
+  HSharpening::ProcessLoadString(load_string,
+                                 code_generator_,
+                                 compiler_driver_,
+                                 *dex_compilation_unit_,
+                                 handles_);
+  AppendInstruction(load_string);
+}
+
 HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
@@ -1811,7 +1817,7 @@
     if (klass->IsPublic()) {
       needs_access_check = false;
     } else {
-      mirror::Class* compiling_class = GetCompilingClass();
+      ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
       if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
         needs_access_check = false;
       }
@@ -1856,9 +1862,9 @@
     // We actually cannot reference this class, we're forced to bail.
     return nullptr;
   }
-  // Append the instruction first, as setting the load kind affects the inputs.
-  AppendInstruction(load_class);
+  // Load kind must be set before inserting the instruction into the graph.
   load_class->SetLoadKind(load_kind);
+  AppendInstruction(load_class);
   return load_class;
 }
 
@@ -2058,6 +2064,8 @@
       uint16_t method_idx;
       if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) {
         if (!CanDecodeQuickenedInfo()) {
+          VLOG(compiler) << "Not compiled: Could not decode quickened instruction "
+                         << instruction.Opcode();
           return false;
         }
         method_idx = LookupQuickenedInfo(quicken_index);
@@ -2083,6 +2091,8 @@
       uint16_t method_idx;
       if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) {
         if (!CanDecodeQuickenedInfo()) {
+          VLOG(compiler) << "Not compiled: Could not decode quickened instruction "
+                         << instruction.Opcode();
           return false;
         }
         method_idx = LookupQuickenedInfo(quicken_index);
@@ -2758,7 +2768,7 @@
     case Instruction::IGET_CHAR_QUICK:
     case Instruction::IGET_SHORT:
     case Instruction::IGET_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, false, quicken_index)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ false, quicken_index)) {
         return false;
       }
       break;
@@ -2778,7 +2788,7 @@
     case Instruction::IPUT_CHAR_QUICK:
     case Instruction::IPUT_SHORT:
     case Instruction::IPUT_SHORT_QUICK: {
-      if (!BuildInstanceFieldAccess(instruction, dex_pc, true, quicken_index)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ true, quicken_index)) {
         return false;
       }
       break;
@@ -2791,9 +2801,7 @@
     case Instruction::SGET_BYTE:
     case Instruction::SGET_CHAR:
     case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
-        return false;
-      }
+      BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ false);
       break;
     }
 
@@ -2804,9 +2812,7 @@
     case Instruction::SPUT_BYTE:
     case Instruction::SPUT_CHAR:
     case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
-        return false;
-      }
+      BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ true);
       break;
     }
 
@@ -2837,20 +2843,14 @@
 
     case Instruction::CONST_STRING: {
       dex::StringIndex string_index(instruction.VRegB_21c());
-      AppendInstruction(new (allocator_) HLoadString(graph_->GetCurrentMethod(),
-                                                     string_index,
-                                                     *dex_file_,
-                                                     dex_pc));
+      BuildLoadString(string_index, dex_pc);
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
 
     case Instruction::CONST_STRING_JUMBO: {
       dex::StringIndex string_index(instruction.VRegB_31c());
-      AppendInstruction(new (allocator_) HLoadString(graph_->GetCurrentMethod(),
-                                                     string_index,
-                                                     *dex_file_,
-                                                     dex_pc));
+      BuildLoadString(string_index, dex_pc);
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
       break;
     }
@@ -2930,7 +2930,7 @@
 ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType(
     dex::TypeIndex type_index,
     const DexCompilationUnit& compilation_unit) const {
-  return ClassLinker::LookupResolvedType(
+  return compilation_unit.GetClassLinker()->LookupResolvedType(
         type_index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get());
 }
 
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index f551ac4..0500d40 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -175,8 +175,8 @@
                                         uint32_t dex_pc,
                                         bool is_put,
                                         DataType::Type field_type);
-  // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+  // Builds a static field access node.
+  void BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
   void BuildArrayAccess(const Instruction& instruction,
                         uint32_t dex_pc,
@@ -240,9 +240,10 @@
   // Builds an instruction sequence for a switch statement.
   void BuildSwitch(const Instruction& instruction, uint32_t dex_pc);
 
-  // Builds a `HLoadClass` loading the given `type_index`. If `outer` is true,
-  // this method will use the outer class's dex file to lookup the type at
-  // `type_index`.
+  // Builds a `HLoadString` loading the given `string_index`.
+  void BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc);
+
+  // Builds a `HLoadClass` loading the given `type_index`.
   HLoadClass* BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc);
 
   HLoadClass* BuildLoadClass(dex::TypeIndex type_index,
@@ -253,10 +254,10 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the outer-most compiling method's class.
-  mirror::Class* GetOutermostCompilingClass() const;
+  ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
 
   // Returns the class whose method is being compiled.
-  mirror::Class* GetCompilingClass() const;
+  ObjPtr<mirror::Class> GetCompilingClass() const;
 
   // Returns whether `type_index` points to the outer-most compiling method's class.
   bool IsOutermostCompilingClass(dex::TypeIndex type_index) const;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 4c18e16..a42a85d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -48,7 +48,7 @@
   void RecordSimplification() {
     simplification_occurred_ = true;
     simplifications_at_current_position_++;
-    MaybeRecordStat(stats_, kInstructionSimplifications);
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplifications);
   }
 
   bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
@@ -663,7 +663,7 @@
 
   HGraph* graph = GetGraph();
   if (object->IsNullConstant()) {
-    MaybeRecordStat(stats_, kRemovedInstanceOf);
+    MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
     instruction->ReplaceWith(graph->GetIntConstant(0));
     instruction->GetBlock()->RemoveInstruction(instruction);
     RecordSimplification();
@@ -674,7 +674,7 @@
   // the return value check with the `outcome` check, b/27651442 .
   bool outcome = false;
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
-    MaybeRecordStat(stats_, kRemovedInstanceOf);
+    MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
       HNotEqual* test = new (graph->GetAllocator()) HNotEqual(graph->GetNullConstant(), object);
@@ -1081,6 +1081,58 @@
   }
 }
 
+// The type conversion is only used for storing into a field/element of the
+// same/narrower size.
+static bool IsTypeConversionForStoringIntoNoWiderFieldOnly(HTypeConversion* type_conversion) {
+  if (type_conversion->HasEnvironmentUses()) {
+    return false;
+  }
+  DataType::Type input_type = type_conversion->GetInputType();
+  DataType::Type result_type = type_conversion->GetResultType();
+  if (!DataType::IsIntegralType(input_type) ||
+      !DataType::IsIntegralType(result_type) ||
+      input_type == DataType::Type::kInt64 ||
+      result_type == DataType::Type::kInt64) {
+    // Type conversion is needed if non-integer types are involved, or 64-bit
+    // types are involved, which may use different number of registers.
+    return false;
+  }
+  if (DataType::Size(input_type) >= DataType::Size(result_type)) {
+    // Type conversion is not necessary when storing to a field/element of the
+    // same/smaller size.
+  } else {
+    // We do not handle this case here.
+    return false;
+  }
+
+  // Check if the converted value is only used for storing into heap.
+  for (const HUseListNode<HInstruction*>& use : type_conversion->GetUses()) {
+    HInstruction* instruction = use.GetUser();
+    if (instruction->IsInstanceFieldSet() &&
+        instruction->AsInstanceFieldSet()->GetFieldType() == result_type) {
+      DCHECK_EQ(instruction->AsInstanceFieldSet()->GetValue(), type_conversion);
+      continue;
+    }
+    if (instruction->IsStaticFieldSet() &&
+        instruction->AsStaticFieldSet()->GetFieldType() == result_type) {
+      DCHECK_EQ(instruction->AsStaticFieldSet()->GetValue(), type_conversion);
+      continue;
+    }
+    if (instruction->IsArraySet() &&
+        instruction->AsArraySet()->GetComponentType() == result_type &&
+        // not index use.
+        instruction->AsArraySet()->GetIndex() != type_conversion) {
+      DCHECK_EQ(instruction->AsArraySet()->GetValue(), type_conversion);
+      continue;
+    }
+    // The use is not as a store value, or the field/element type is not the
+    // same as the result_type, keep the type conversion.
+    return false;
+  }
+  // Codegen automatically handles the type conversion during the store.
+  return true;
+}
+
 void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) {
   HInstruction* input = instruction->GetInput();
   DataType::Type input_type = input->GetType();
@@ -1169,6 +1221,13 @@
       return;
     }
   }
+
+  if (IsTypeConversionForStoringIntoNoWiderFieldOnly(instruction)) {
+    instruction->ReplaceWith(input);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    return;
+  }
 }
 
 void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
@@ -2035,7 +2094,9 @@
       optimizations.SetArgumentIsString();
     } else if (kUseReadBarrier) {
       DCHECK(instruction->GetResolvedMethod() != nullptr);
-      DCHECK(instruction->GetResolvedMethod()->GetDeclaringClass()->IsStringClass());
+      DCHECK(instruction->GetResolvedMethod()->GetDeclaringClass()->IsStringClass() ||
+             // Object.equals() can be devirtualized to String.equals().
+             instruction->GetResolvedMethod()->GetDeclaringClass()->IsObjectClass());
       Runtime* runtime = Runtime::Current();
       // For AOT, we always assume that the boot image shall contain the String.class and
       // we do not need a read barrier for boot image classes as they are non-moveable.
@@ -2266,7 +2327,7 @@
   HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length */ true);
   invoke->GetBlock()->InsertInstructionBefore(length, invoke);
   HBoundsCheck* bounds_check = new (allocator) HBoundsCheck(
-      index, length, dex_pc, invoke->GetDexMethodIndex());
+      index, length, dex_pc, /* is_string_char_at */ true);
   invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke);
   HArrayGet* array_get = new (allocator) HArrayGet(str,
                                                    bounds_check,
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index d41e49a..92081e3 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -37,9 +37,7 @@
 
  private:
   void RecordSimplification() {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(kInstructionSimplificationsArch);
-    }
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
   }
 
   bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 69e1463..1c44e5a 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -37,9 +37,7 @@
 
  private:
   void RecordSimplification() {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(kInstructionSimplificationsArch);
-    }
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
   }
 
   bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc
index 6a0d8a6..fa97401 100644
--- a/compiler/optimizing/instruction_simplifier_mips.cc
+++ b/compiler/optimizing/instruction_simplifier_mips.cc
@@ -33,9 +33,7 @@
 
  private:
   void RecordSimplification() {
-    if (stats_ != nullptr) {
-      stats_->RecordStat(kInstructionSimplificationsArch);
-    }
+    MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
   }
 
   bool TryExtractArrayAccessIndex(HInstruction* access,
diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h
index 22cc2ef..6cb8aff 100644
--- a/compiler/optimizing/instruction_simplifier_mips.h
+++ b/compiler/optimizing/instruction_simplifier_mips.h
@@ -30,7 +30,7 @@
 class InstructionSimplifierMips : public HOptimization {
  public:
   InstructionSimplifierMips(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "instruction_simplifier_mips", stats),
+      : HOptimization(graph, kInstructionSimplifierMipsPassName, stats),
         codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {}
 
   static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips";
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index dfae534..6928b70 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -104,7 +104,8 @@
   return kCanThrow;
 }
 
-static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
+static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual.
   //
   // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization
@@ -130,18 +131,51 @@
       }
       if (invoke_type == kVirtual) {
         ArtMethod* art_method = invoke->GetResolvedMethod();
-        ScopedObjectAccess soa(Thread::Current());
         return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal());
       }
       return false;
 
     case kVirtual:
       // Call might be devirtualized.
-      return (invoke_type == kVirtual || invoke_type == kDirect);
+      return (invoke_type == kVirtual || invoke_type == kDirect || invoke_type == kInterface);
 
-    default:
+    case kSuper:
+    case kInterface:
+    case kPolymorphic:
       return false;
   }
+  LOG(FATAL) << "Unknown intrinsic invoke type: " << intrinsic_type;
+  UNREACHABLE();
+}
+
+bool IntrinsicsRecognizer::Recognize(HInvoke* invoke,
+                                     ArtMethod* art_method,
+                                     /*out*/ bool* wrong_invoke_type) {
+  if (art_method == nullptr) {
+    art_method = invoke->GetResolvedMethod();
+  }
+  *wrong_invoke_type = false;
+  if (art_method == nullptr || !art_method->IsIntrinsic()) {
+    return false;
+  }
+
+  // TODO: b/65872996 The intent is that polymorphic signature methods should
+  // be compiler intrinsics. At present, they are only interpreter intrinsics.
+  if (art_method->IsPolymorphicSignature()) {
+    return false;
+  }
+
+  Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic());
+  if (CheckInvokeType(intrinsic, invoke) == false) {
+    *wrong_invoke_type = true;
+    return false;
+  }
+
+  invoke->SetIntrinsic(intrinsic,
+                       NeedsEnvironmentOrCache(intrinsic),
+                       GetSideEffects(intrinsic),
+                       GetExceptions(intrinsic));
+  return true;
 }
 
 void IntrinsicsRecognizer::Run() {
@@ -151,23 +185,14 @@
          inst_it.Advance()) {
       HInstruction* inst = inst_it.Current();
       if (inst->IsInvoke()) {
-        HInvoke* invoke = inst->AsInvoke();
-        ArtMethod* art_method = invoke->GetResolvedMethod();
-        if (art_method != nullptr && art_method->IsIntrinsic()) {
-          Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic());
-          if (!CheckInvokeType(intrinsic, invoke)) {
-            LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
-                << static_cast<uint32_t>(intrinsic) << " for "
-                << art_method->PrettyMethod()
-                << invoke->DebugName();
-          } else {
-            invoke->SetIntrinsic(intrinsic,
-                                 NeedsEnvironmentOrCache(intrinsic),
-                                 GetSideEffects(intrinsic),
-                                 GetExceptions(intrinsic));
-            MaybeRecordStat(stats_,
-                            MethodCompilationStat::kIntrinsicRecognized);
-          }
+        bool wrong_invoke_type = false;
+        if (Recognize(inst->AsInvoke(), /* art_method */ nullptr, &wrong_invoke_type)) {
+          MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized);
+        } else if (wrong_invoke_type) {
+          LOG(WARNING)
+              << "Found an intrinsic with unexpected invoke type: "
+              << inst->AsInvoke()->GetResolvedMethod()->PrettyMethod() << " "
+              << inst->DebugName();
         }
       }
     }
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 818d7f6..6299143 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -44,6 +44,12 @@
 
   void Run() OVERRIDE;
 
+  // Static helper that recognizes intrinsic call. Returns true on success.
+  // If it fails due to invoke type mismatch, wrong_invoke_type is set.
+  // Useful to recognize intrinsics on individual calls outside this full pass.
+  static bool Recognize(HInvoke* invoke, ArtMethod* method, /*out*/ bool* wrong_invoke_type)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition";
 
  private:
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 89ad85e..88326d3 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -74,25 +74,116 @@
     HGraphVisitor::VisitBasicBlock(block);
   }
 
+  HTypeConversion* AddTypeConversionIfNecessary(HInstruction* instruction,
+                                                HInstruction* value,
+                                                DataType::Type expected_type) {
+    HTypeConversion* type_conversion = nullptr;
+    // Should never add type conversion into boolean value.
+    if (expected_type != DataType::Type::kBool &&
+        !DataType::IsTypeConversionImplicit(value->GetType(), expected_type)) {
+      type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
+          expected_type, value, instruction->GetDexPc());
+      instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
+    }
+    return type_conversion;
+  }
+
+  // Find an instruction's substitute if it should be removed.
+  // Return the same instruction if it should not be removed.
+  HInstruction* FindSubstitute(HInstruction* instruction) {
+    size_t size = removed_loads_.size();
+    for (size_t i = 0; i < size; i++) {
+      if (removed_loads_[i] == instruction) {
+        return substitute_instructions_for_loads_[i];
+      }
+    }
+    return instruction;
+  }
+
+  void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) {
+    DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
+        "Unexpected heap_value that has a substitute " << heap_value->DebugName();
+    removed_loads_.push_back(load);
+    substitute_instructions_for_loads_.push_back(heap_value);
+  }
+
+  // Scan the list of removed loads to see if we can reuse `type_conversion`, if
+  // the other removed load has the same substitute and type and is dominated
+  // by `type_conversioni`.
+  void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) {
+    size_t size = removed_loads_.size();
+    HInstruction* load = removed_loads_[index];
+    HInstruction* substitute = substitute_instructions_for_loads_[index];
+    for (size_t j = index + 1; j < size; j++) {
+      HInstruction* load2 = removed_loads_[j];
+      HInstruction* substitute2 = substitute_instructions_for_loads_[j];
+      if (load2 == nullptr) {
+        DCHECK(substitute2->IsTypeConversion());
+        continue;
+      }
+      DCHECK(load2->IsInstanceFieldGet() ||
+             load2->IsStaticFieldGet() ||
+             load2->IsArrayGet());
+      DCHECK(substitute2 != nullptr);
+      if (substitute2 == substitute &&
+          load2->GetType() == load->GetType() &&
+          type_conversion->GetBlock()->Dominates(load2->GetBlock()) &&
+          // Don't share across irreducible loop headers.
+          // TODO: can be more fine-grained than this by testing each dominator.
+          (load2->GetBlock() == type_conversion->GetBlock() ||
+           !GetGraph()->HasIrreducibleLoops())) {
+        // The removed_loads_ are added in reverse post order.
+        DCHECK(type_conversion->StrictlyDominates(load2));
+        load2->ReplaceWith(type_conversion);
+        load2->GetBlock()->RemoveInstruction(load2);
+        removed_loads_[j] = nullptr;
+        substitute_instructions_for_loads_[j] = type_conversion;
+      }
+    }
+  }
+
   // Remove recorded instructions that should be eliminated.
   void RemoveInstructions() {
     size_t size = removed_loads_.size();
     DCHECK_EQ(size, substitute_instructions_for_loads_.size());
     for (size_t i = 0; i < size; i++) {
       HInstruction* load = removed_loads_[i];
-      DCHECK(load != nullptr);
+      if (load == nullptr) {
+        // The load has been handled in the scan for type conversion below.
+        DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion());
+        continue;
+      }
       DCHECK(load->IsInstanceFieldGet() ||
              load->IsStaticFieldGet() ||
              load->IsArrayGet());
       HInstruction* substitute = substitute_instructions_for_loads_[i];
       DCHECK(substitute != nullptr);
-      // Keep tracing substitute till one that's not removed.
-      HInstruction* sub_sub = FindSubstitute(substitute);
-      while (sub_sub != substitute) {
-        substitute = sub_sub;
-        sub_sub = FindSubstitute(substitute);
+      // We proactively retrieve the substitute for a removed load, so
+      // a load that has a substitute should not be observed as a heap
+      // location value.
+      DCHECK_EQ(FindSubstitute(substitute), substitute);
+
+      // The load expects to load the heap value as type load->GetType().
+      // However the tracked heap value may not be of that type. An explicit
+      // type conversion may be needed.
+      // There are actually three types involved here:
+      // (1) tracked heap value's type (type A)
+      // (2) heap location (field or element)'s type (type B)
+      // (3) load's type (type C)
+      // We guarantee that type A stored as type B and then fetched out as
+      // type C is the same as casting from type A to type C directly, since
+      // type B and type C will have the same size which is guarenteed in
+      // HInstanceFieldGet/HStaticFieldGet/HArrayGet's SetType().
+      // So we only need one type conversion from type A to type C.
+      HTypeConversion* type_conversion = AddTypeConversionIfNecessary(
+          load, substitute, load->GetType());
+      if (type_conversion != nullptr) {
+        TryToReuseTypeConversion(type_conversion, i);
+        load->ReplaceWith(type_conversion);
+        substitute_instructions_for_loads_[i] = type_conversion;
+      } else {
+        load->ReplaceWith(substitute);
       }
-      load->ReplaceWith(substitute);
       load->GetBlock()->RemoveInstruction(load);
     }
 
@@ -328,8 +419,7 @@
     HInstruction* heap_value = heap_values[idx];
     if (heap_value == kDefaultHeapValue) {
       HInstruction* constant = GetDefaultValue(instruction->GetType());
-      removed_loads_.push_back(instruction);
-      substitute_instructions_for_loads_.push_back(constant);
+      AddRemovedLoad(instruction, constant);
       heap_values[idx] = constant;
       return;
     }
@@ -342,6 +432,8 @@
         DCHECK(ref_info->IsSingleton());
         // Get the real heap value of the store.
         heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
+        // heap_value may already have a substitute.
+        heap_value = FindSubstitute(heap_value);
       }
     }
     if (heap_value == kUnknownHeapValue) {
@@ -362,8 +454,7 @@
         }
         return;
       }
-      removed_loads_.push_back(instruction);
-      substitute_instructions_for_loads_.push_back(heap_value);
+      AddRemovedLoad(instruction, heap_value);
       TryRemovingNullCheck(instruction);
     }
   }
@@ -385,6 +476,8 @@
                         size_t vector_length,
                         int16_t declaring_class_def_index,
                         HInstruction* value) {
+    // value may already have a substitute.
+    value = FindSubstitute(value);
     HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
     size_t idx = heap_location_collector_.FindHeapLocationIndex(
@@ -679,18 +772,6 @@
     }
   }
 
-  // Find an instruction's substitute if it should be removed.
-  // Return the same instruction if it should not be removed.
-  HInstruction* FindSubstitute(HInstruction* instruction) {
-    size_t size = removed_loads_.size();
-    for (size_t i = 0; i < size; i++) {
-      if (removed_loads_[i] == instruction) {
-        return substitute_instructions_for_loads_[i];
-      }
-    }
-    return instruction;
-  }
-
   const HeapLocationCollector& heap_location_collector_;
   const SideEffectsAnalysis& side_effects_;
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 1ca0960..3dc1ef7 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1749,7 +1749,8 @@
 HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
   if (instruction->IsPhi()) {
     HInstruction* input = instruction->InputAt(1);
-    if (input->IsVecOperation() && !input->IsVecExtractScalar()) {
+    if (HVecOperation::ReturnsSIMDValue(input)) {
+      DCHECK(!input->IsPhi());
       HVecOperation* input_vector = input->AsVecOperation();
       uint32_t vector_length = input_vector->GetVectorLength();
       DataType::Type type = input_vector->GetPackedType();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index fa580d9..5f33ed6 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -507,6 +507,7 @@
       if (block->IsCatchBlock()) {
         // TODO: Dealing with exceptional back edges could be tricky because
         //       they only approximate the real control flow. Bail out for now.
+        VLOG(compiler) << "Not compiled: Exceptional back edges";
         return kAnalysisFailThrowCatchLoop;
       }
       block->GetLoopInformation()->Populate();
@@ -1403,6 +1404,14 @@
   if (GetInput()->IsIntConstant()) {
     int32_t value = GetInput()->AsIntConstant()->GetValue();
     switch (GetResultType()) {
+      case DataType::Type::kInt8:
+        return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
+      case DataType::Type::kUint8:
+        return graph->GetIntConstant(static_cast<uint8_t>(value), GetDexPc());
+      case DataType::Type::kInt16:
+        return graph->GetIntConstant(static_cast<int16_t>(value), GetDexPc());
+      case DataType::Type::kUint16:
+        return graph->GetIntConstant(static_cast<uint16_t>(value), GetDexPc());
       case DataType::Type::kInt64:
         return graph->GetLongConstant(static_cast<int64_t>(value), GetDexPc());
       case DataType::Type::kFloat32:
@@ -1415,6 +1424,14 @@
   } else if (GetInput()->IsLongConstant()) {
     int64_t value = GetInput()->AsLongConstant()->GetValue();
     switch (GetResultType()) {
+      case DataType::Type::kInt8:
+        return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
+      case DataType::Type::kUint8:
+        return graph->GetIntConstant(static_cast<uint8_t>(value), GetDexPc());
+      case DataType::Type::kInt16:
+        return graph->GetIntConstant(static_cast<int16_t>(value), GetDexPc());
+      case DataType::Type::kUint16:
+        return graph->GetIntConstant(static_cast<uint16_t>(value), GetDexPc());
       case DataType::Type::kInt32:
         return graph->GetIntConstant(static_cast<int32_t>(value), GetDexPc());
       case DataType::Type::kFloat32:
@@ -2814,21 +2831,6 @@
   }
 }
 
-void HLoadClass::SetLoadKind(LoadKind load_kind) {
-  SetPackedField<LoadKindField>(load_kind);
-
-  if (load_kind != LoadKind::kRuntimeCall &&
-      load_kind != LoadKind::kReferrersClass) {
-    RemoveAsUserOfInput(0u);
-    SetRawInputAt(0u, nullptr);
-  }
-
-  if (!NeedsEnvironment()) {
-    RemoveEnvironment();
-    SetSideEffects(SideEffects::None());
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
   switch (rhs) {
     case HLoadClass::LoadKind::kReferrersClass:
@@ -2871,21 +2873,6 @@
   }
 }
 
-void HLoadString::SetLoadKind(LoadKind load_kind) {
-  // Once sharpened, the load kind should not be changed again.
-  DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
-  SetPackedField<LoadKindField>(load_kind);
-
-  if (load_kind != LoadKind::kRuntimeCall) {
-    RemoveAsUserOfInput(0u);
-    SetRawInputAt(0u, nullptr);
-  }
-  if (!NeedsEnvironment()) {
-    RemoveEnvironment();
-    SetSideEffects(SideEffects::None());
-  }
-}
-
 std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
   switch (rhs) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 66d5bfe..42a9d95 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -4614,7 +4614,6 @@
   }
 
   uint32_t GetImtIndex() const { return imt_index_; }
-  uint32_t GetDexMethodIndex() const { return dex_method_index_; }
 
   DECLARE_INSTRUCTION(InvokeInterface);
 
@@ -5787,10 +5786,10 @@
   HBoundsCheck(HInstruction* index,
                HInstruction* length,
                uint32_t dex_pc,
-               bool string_char_at = false)
+               bool is_string_char_at = false)
       : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
     DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType()));
-    SetPackedFlag<kFlagIsStringCharAt>(string_char_at);
+    SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at);
     SetRawInputAt(0, index);
     SetRawInputAt(1, length);
   }
@@ -6062,6 +6061,20 @@
 std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
 
 // Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
+inline void HLoadClass::SetLoadKind(LoadKind load_kind) {
+  // The load kind should be determined before inserting the instruction to the graph.
+  DCHECK(GetBlock() == nullptr);
+  DCHECK(GetEnvironment() == nullptr);
+  SetPackedField<LoadKindField>(load_kind);
+  if (load_kind != LoadKind::kRuntimeCall && load_kind != LoadKind::kReferrersClass) {
+    special_input_ = HUserRecord<HInstruction*>(nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+// Note: defined outside class to see operator<<(., HLoadClass::LoadKind).
 inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
   // The special input is used for PC-relative loads on some architectures,
   // including literal pool loads, which are PC-relative too.
@@ -6209,6 +6222,21 @@
 std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs);
 
 // Note: defined outside class to see operator<<(., HLoadString::LoadKind).
+inline void HLoadString::SetLoadKind(LoadKind load_kind) {
+  // The load kind should be determined before inserting the instruction to the graph.
+  DCHECK(GetBlock() == nullptr);
+  DCHECK(GetEnvironment() == nullptr);
+  DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
+  SetPackedField<LoadKindField>(load_kind);
+  if (load_kind != LoadKind::kRuntimeCall) {
+    special_input_ = HUserRecord<HInstruction*>(nullptr);
+  }
+  if (!NeedsEnvironment()) {
+    SetSideEffects(SideEffects::None());
+  }
+}
+
+// Note: defined outside class to see operator<<(., HLoadString::LoadKind).
 inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
   // The special input is used for PC-relative loads on some architectures,
   // including literal pool loads, which are PC-relative too.
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 59d5b9f..87dff84 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -109,6 +109,16 @@
 
   // Assumes vector nodes cannot be moved by default. Each concrete implementation
   // that can be moved should override this method and return true.
+  //
+  // Note: similar approach is used for instruction scheduling (if it is turned on for the target):
+  // by default HScheduler::IsSchedulable returns false for a particular HVecOperation.
+  // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see
+  // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also
+  // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction.
+  //
+  // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be
+  // altered to return true if the instruction might reside outside the SIMD loop body since SIMD
+  // registers are not kept alive across vector loop boundaries (yet).
   bool CanBeMoved() const OVERRIDE { return false; }
 
   // Tests if all data of a vector node (vector length and packed type) is equal.
@@ -150,6 +160,19 @@
     }
   }
 
+  // Helper method to determine if an instruction returns a SIMD value.
+  // TODO: This method is needed until we introduce SIMD as proper type.
+  static bool ReturnsSIMDValue(HInstruction* instruction) {
+    if (instruction->IsVecOperation()) {
+      return !instruction->IsVecExtractScalar();  // only scalar returning vec op
+    } else if (instruction->IsPhi()) {
+      return
+          instruction->GetType() == kSIMDType &&
+          instruction->InputAt(1)->IsVecOperation();  // vectorizer does not go deeper
+    }
+    return false;
+  }
+
   DECLARE_ABSTRACT_INSTRUCTION(VecOperation);
 
  protected:
@@ -879,7 +902,7 @@
                       vector_length,
                       dex_pc) {
     for (size_t i = 0; i < number_of_scalars; i++) {
-      DCHECK(!scalars[i]->IsVecOperation() || scalars[i]->IsVecExtractScalar());
+      DCHECK(!ReturnsSIMDValue(scalars[i]));
       SetRawInputAt(0, scalars[i]);
     }
   }
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 7edb642..7149d93 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -258,8 +258,7 @@
         break;
       }
       case OptimizationPass::kSharpening:
-        opt = new (allocator) HSharpening(
-            graph, codegen, dex_compilation_unit, driver, handles, name);
+        opt = new (allocator) HSharpening(graph, codegen, driver, name);
         break;
       case OptimizationPass::kSelectGenerator:
         opt = new (allocator) HSelectGenerator(graph, handles, stats, name);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 53f9ec4..24b1a12 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -112,7 +112,7 @@
                Mutex& dump_mutex)
       : graph_(graph),
         cached_method_name_(),
-        timing_logger_enabled_(compiler_driver->GetDumpPasses()),
+        timing_logger_enabled_(compiler_driver->GetCompilerOptions().GetDumpTimings()),
         timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetAllocator()),
         visualizer_oss_(),
@@ -407,7 +407,7 @@
         driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
   }
-  if (driver->GetDumpStats()) {
+  if (driver->GetCompilerOptions().GetDumpStats()) {
     compilation_stats_.reset(new OptimizingCompilerStats());
   }
 }
@@ -738,7 +738,7 @@
                                               ArtMethod* method,
                                               bool osr,
                                               VariableSizedHandleScope* handles) const {
-  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptCompilation);
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
   const DexFile& dex_file = *dex_compilation_unit.GetDexFile();
@@ -757,8 +757,7 @@
   }
 
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
-    MaybeRecordStat(compilation_stats_.get(),
-                    MethodCompilationStat::kNotCompiledPathological);
+    MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
   }
 
@@ -768,8 +767,7 @@
   const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
   if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace)
       && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) {
-    MaybeRecordStat(compilation_stats_.get(),
-                    MethodCompilationStat::kNotCompiledSpaceFilter);
+    MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSpaceFilter);
     return nullptr;
   }
 
@@ -800,8 +798,7 @@
                             compiler_driver->GetCompilerOptions(),
                             compilation_stats_.get()));
   if (codegen.get() == nullptr) {
-    MaybeRecordStat(compilation_stats_.get(),
-                    MethodCompilationStat::kNotCompiledNoCodegen);
+    MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
@@ -873,6 +870,7 @@
   codegen->Compile(code_allocator);
   pass_observer.DumpDisassembly();
 
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode);
   return codegen.release();
 }
 
@@ -883,6 +881,7 @@
     const DexCompilationUnit& dex_compilation_unit,
     ArtMethod* method,
     VariableSizedHandleScope* handles) const {
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptIntrinsicCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
   const DexFile& dex_file = *dex_compilation_unit.GetDexFile();
@@ -894,8 +893,6 @@
 
   // Do not attempt to compile on architectures we do not support.
   if (!IsInstructionSetSupported(instruction_set)) {
-    MaybeRecordStat(compilation_stats_.get(),
-                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
     return nullptr;
   }
 
@@ -920,8 +917,6 @@
                             compiler_driver->GetCompilerOptions(),
                             compilation_stats_.get()));
   if (codegen.get() == nullptr) {
-    MaybeRecordStat(compilation_stats_.get(),
-                    MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
   codegen->GetAssembler()->cfi().SetEnabled(
@@ -979,6 +974,7 @@
 
   VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic()
       << " " << graph->PrettyMethod();
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledIntrinsic);
   return codegen.release();
 }
 
@@ -1046,8 +1042,6 @@
       }
     }
     if (codegen.get() != nullptr) {
-      MaybeRecordStat(compilation_stats_.get(),
-                      MethodCompilationStat::kCompiled);
       compiled_method = Emit(&allocator,
                              &code_allocator,
                              codegen.get(),
@@ -1139,10 +1133,20 @@
     }
   }
 
-  return ArtQuickJniCompileMethod(GetCompilerDriver(),
-                                  access_flags,
-                                  method_idx,
-                                  dex_file);
+  JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
+      GetCompilerDriver(), access_flags, method_idx, dex_file);
+  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub);
+  return CompiledMethod::SwapAllocCompiledMethod(
+      GetCompilerDriver(),
+      jni_compiled_method.GetInstructionSet(),
+      jni_compiled_method.GetCode(),
+      jni_compiled_method.GetFrameSize(),
+      jni_compiled_method.GetCoreSpillMask(),
+      jni_compiled_method.GetFpSpillMask(),
+      /* method_info */ ArrayRef<const uint8_t>(),
+      /* vmap_table */ ArrayRef<const uint8_t>(),
+      jni_compiled_method.GetCfi(),
+      /* patches */ ArrayRef<const linker::LinkerPatch>());
 }
 
 Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
@@ -1192,7 +1196,72 @@
 
   Runtime* runtime = Runtime::Current();
   ArenaAllocator allocator(runtime->GetJitArenaPool());
-  ArenaStack arena_stack(Runtime::Current()->GetJitArenaPool());
+
+  if (UNLIKELY(method->IsNative())) {
+    JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
+        GetCompilerDriver(), access_flags, method_idx, *dex_file);
+    ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots;
+    ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list(
+        allocator.Adapter(kArenaAllocCHA));
+    const void* code = code_cache->CommitCode(
+        self,
+        method,
+        /* stack_map_data */ nullptr,
+        /* method_info_data */ nullptr,
+        /* roots_data */ nullptr,
+        jni_compiled_method.GetFrameSize(),
+        jni_compiled_method.GetCoreSpillMask(),
+        jni_compiled_method.GetFpSpillMask(),
+        jni_compiled_method.GetCode().data(),
+        jni_compiled_method.GetCode().size(),
+        /* data_size */ 0u,
+        osr,
+        roots,
+        /* has_should_deoptimize_flag */ false,
+        cha_single_implementation_list);
+    if (code == nullptr) {
+      return false;
+    }
+
+    const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+    if (compiler_options.GenerateAnyDebugInfo()) {
+      const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
+      const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
+      debug::MethodDebugInfo info = {};
+      DCHECK(info.trampoline_name.empty());
+      info.dex_file = dex_file;
+      info.class_def_index = class_def_idx;
+      info.dex_method_index = method_idx;
+      info.access_flags = access_flags;
+      info.code_item = code_item;
+      info.isa = jni_compiled_method.GetInstructionSet();
+      info.deduped = false;
+      info.is_native_debuggable = compiler_options.GetNativeDebuggable();
+      info.is_optimized = true;
+      info.is_code_address_text_relative = false;
+      info.code_address = code_address;
+      info.code_size = jni_compiled_method.GetCode().size();
+      info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
+      info.code_info = nullptr;
+      info.cfi = jni_compiled_method.GetCfi();
+      // If both flags are passed, generate full debug info.
+      const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
+      std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
+          GetCompilerDriver()->GetInstructionSet(),
+          GetCompilerDriver()->GetInstructionSetFeatures(),
+          mini_debug_info,
+          info);
+      CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
+    }
+
+    Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
+    if (jit_logger != nullptr) {
+      jit_logger->WriteLog(code, jni_compiled_method.GetCode().size(), method);
+    }
+    return true;
+  }
+
+  ArenaStack arena_stack(runtime->GetJitArenaPool());
   CodeVectorAllocator code_allocator(&allocator);
   VariableSizedHandleScope handles(self);
 
@@ -1237,6 +1306,7 @@
           self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots)));
   if (roots == nullptr) {
     // Out of memory, just clear the exception to avoid any Java exception uncaught problems.
+    MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
     DCHECK(self->IsExceptionPending());
     self->ClearException();
     return false;
@@ -1253,9 +1323,9 @@
                                                &method_info_data,
                                                &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
+    MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
     return false;
   }
-  MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
                           MemoryRegion(method_info_data, method_info_size),
                           code_item);
@@ -1279,12 +1349,13 @@
       codegen->GetGraph()->GetCHASingleImplementationList());
 
   if (code == nullptr) {
+    MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
     code_cache->ClearData(self, stack_map_data, roots_data);
     return false;
   }
 
   const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
-  if (compiler_options.GetGenerateDebugInfo()) {
+  if (compiler_options.GenerateAnyDebugInfo()) {
     const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
     const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
     debug::MethodDebugInfo info = {};
@@ -1304,10 +1375,13 @@
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
-    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
+    // If both flags are passed, generate full debug info.
+    const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
+    std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
         GetCompilerDriver()->GetInstructionSet(),
         GetCompilerDriver()->GetInstructionSetFeatures(),
-        ArrayRef<const debug::MethodDebugInfo>(&info, 1));
+        mini_debug_info,
+        info);
     CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
   }
 
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 07f9635..32a94ab 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -23,14 +23,18 @@
 #include <type_traits>
 
 #include "atomic.h"
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "globals.h"
 
 namespace art {
 
-enum MethodCompilationStat {
-  kAttemptCompilation = 0,
+enum class MethodCompilationStat {
+  kAttemptBytecodeCompilation = 0,
+  kAttemptIntrinsicCompilation,
+  kCompiledNativeStub,
+  kCompiledIntrinsic,
+  kCompiledBytecode,
   kCHAInline,
-  kCompiled,
   kInlinedInvoke,
   kReplacedInvokeWithSimplePattern,
   kInstructionSimplifications,
@@ -94,8 +98,10 @@
   kConstructorFenceRemovedLSE,
   kConstructorFenceRemovedPFRA,
   kConstructorFenceRemovedCFRE,
+  kJitOutOfMemoryForCommit,
   kLastStat
 };
+std::ostream& operator<<(std::ostream& os, const MethodCompilationStat& rhs);
 
 class OptimizingCompilerStats {
  public:
@@ -105,7 +111,15 @@
   }
 
   void RecordStat(MethodCompilationStat stat, uint32_t count = 1) {
-    compile_stats_[stat] += count;
+    size_t stat_index = static_cast<size_t>(stat);
+    DCHECK_LT(stat_index, arraysize(compile_stats_));
+    compile_stats_[stat_index] += count;
+  }
+
+  uint32_t GetStat(MethodCompilationStat stat) const {
+    size_t stat_index = static_cast<size_t>(stat);
+    DCHECK_LT(stat_index, arraysize(compile_stats_));
+    return compile_stats_[stat_index];
   }
 
   void Log() const {
@@ -114,18 +128,29 @@
       return;
     }
 
-    if (compile_stats_[kAttemptCompilation] == 0) {
+    uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic);
+    uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub);
+    uint32_t bytecode_attempts =
+        GetStat(MethodCompilationStat::kAttemptBytecodeCompilation);
+    if (compiled_intrinsics == 0u && compiled_native_stubs == 0u && bytecode_attempts == 0u) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      float compiled_percent =
-          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
-      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
-          << " methods: " << std::fixed << std::setprecision(2)
-          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
+      uint32_t compiled_bytecode_methods =
+          GetStat(MethodCompilationStat::kCompiledBytecode);
+      // Successful intrinsic compilation preempts other compilation attempts but failed intrinsic
+      // compilation shall still count towards bytecode or native stub compilation attempts.
+      uint32_t num_compilation_attempts =
+          compiled_intrinsics + compiled_native_stubs + bytecode_attempts;
+      uint32_t num_successful_compilations =
+          compiled_intrinsics + compiled_native_stubs + compiled_bytecode_methods;
+      float compiled_percent = num_successful_compilations * 100.0f / num_compilation_attempts;
+      LOG(INFO) << "Attempted compilation of "
+          << num_compilation_attempts << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << num_successful_compilations << ") compiled.";
 
-      for (size_t i = 0; i < kLastStat; i++) {
+      for (size_t i = 0; i < arraysize(compile_stats_); ++i) {
         if (compile_stats_[i] != 0) {
-          LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": "
+          LOG(INFO) << "OptStat#" << static_cast<MethodCompilationStat>(i) << ": "
               << compile_stats_[i];
         }
       }
@@ -133,7 +158,7 @@
   }
 
   void AddTo(OptimizingCompilerStats* other_stats) {
-    for (size_t i = 0; i != kLastStat; ++i) {
+    for (size_t i = 0; i != arraysize(compile_stats_); ++i) {
       uint32_t count = compile_stats_[i];
       if (count != 0) {
         other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count);
@@ -142,91 +167,13 @@
   }
 
   void Reset() {
-    for (size_t i = 0; i != kLastStat; ++i) {
-      compile_stats_[i] = 0u;
+    for (std::atomic<uint32_t>& stat : compile_stats_) {
+      stat = 0u;
     }
   }
 
  private:
-  std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
-    std::string name;
-    switch (stat) {
-      case kAttemptCompilation : name = "AttemptCompilation"; break;
-      case kCHAInline : name = "CHAInline"; break;
-      case kCompiled : name = "Compiled"; break;
-      case kInlinedInvoke : name = "InlinedInvoke"; break;
-      case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break;
-      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
-      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
-      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
-      case kUnresolvedField : name = "UnresolvedField"; break;
-      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
-      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
-      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
-      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
-      case kNotCompiledSkipped: name = "NotCompiledSkipped"; break;
-      case kNotCompiledInvalidBytecode: name = "NotCompiledInvalidBytecode"; break;
-      case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break;
-      case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break;
-      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
-      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
-      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
-      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
-      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
-      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
-      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
-      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
-      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
-      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
-      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
-      case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
-      case kMonomorphicCall: name = "MonomorphicCall"; break;
-      case kPolymorphicCall: name = "PolymorphicCall"; break;
-      case kMegamorphicCall: name = "MegamorphicCall"; break;
-      case kBooleanSimplified : name = "BooleanSimplified"; break;
-      case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
-      case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
-      case kLoopVectorized : name = "LoopVectorized"; break;
-      case kLoopVectorizedIdiom : name = "LoopVectorizedIdiom"; break;
-      case kSelectGenerated : name = "SelectGenerated"; break;
-      case kRemovedInstanceOf: name = "RemovedInstanceOf"; break;
-      case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
-      case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
-      case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
-      case kSimplifyIf: name = "SimplifyIf"; break;
-      case kInstructionSunk: name = "InstructionSunk"; break;
-      case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break;
-      case kNotInlinedDexCache: name = "NotInlinedDexCache"; break;
-      case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break;
-      case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break;
-      case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break;
-      case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break;
-      case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break;
-      case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break;
-      case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break;
-      case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break;
-      case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break;
-      case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break;
-      case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break;
-      case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break;
-      case kNotInlinedWont: name = "NotInlinedWont"; break;
-      case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
-      case kNotInlinedProxy: name = "NotInlinedProxy"; break;
-      case kConstructorFenceGeneratedNew: name = "ConstructorFenceGeneratedNew"; break;
-      case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break;
-      case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break;
-      case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break;
-      case kConstructorFenceRemovedCFRE: name = "ConstructorFenceRemovedCFRE"; break;
-
-      case kLastStat:
-        LOG(FATAL) << "invalid stat "
-            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-        UNREACHABLE();
-    }
-    return "OptStat#" + name;
-  }
-
-  std::atomic<uint32_t> compile_stats_[kLastStat];
+  std::atomic<uint32_t> compile_stats_[static_cast<size_t>(MethodCompilationStat::kLastStat)];
 
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
 };
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 7246129..8bb124e 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -544,7 +544,7 @@
       // the method is from the String class, the null loader is good enough.
       Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
       ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
-          dex_file, invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
+          invoke->GetDexMethodIndex(), dex_cache, loader, /* referrer */ nullptr, kDirect);
       DCHECK(method != nullptr);
       mirror::Class* declaring_class = method->GetDeclaringClass();
       DCHECK(declaring_class != nullptr);
@@ -576,8 +576,8 @@
 
   ScopedObjectAccess soa(Thread::Current());
   ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
-  ObjPtr<mirror::Class> klass =
-      ClassLinker::LookupResolvedType(type_idx, dex_cache, class_loader_.Get());
+  ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
+      type_idx, dex_cache, class_loader_.Get());
   SetClassAsTypeInfo(instr, klass, is_exact);
 }
 
@@ -612,7 +612,7 @@
 
   // The field is unknown only during tests.
   if (info.GetField() != nullptr) {
-    klass = info.GetField()->LookupType();
+    klass = info.GetField()->LookupResolvedType();
   }
 
   SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index bb7c353..dfa077f 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -462,6 +462,11 @@
   // containing basic block from being scheduled.
   // This method is used to restrict scheduling to instructions that we know are
   // safe to handle.
+  //
+  // For newly introduced instructions by default HScheduler::IsSchedulable returns false.
+  // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see
+  // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also
+  // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction.
   virtual bool IsSchedulable(const HInstruction* instruction) const;
   bool IsSchedulable(const HBasicBlock* block) const;
 
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 32f161f..f71cb5b 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -151,6 +151,20 @@
 #undef CASE_INSTRUCTION_KIND
   }
 
+  // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
+  // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
+  // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
+  // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
+  //
+  // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
+  bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE {
+    return HScheduler::IsSchedulingBarrier(instr) ||
+           instr->IsVecReduce() ||
+           instr->IsVecExtractScalar() ||
+           instr->IsVecSetScalars() ||
+           instr->IsVecReplicateScalar();
+  }
+
  private:
   SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index e46c9a7..1e49411 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -45,8 +45,6 @@
         SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(),
                                     codegen_,
                                     compiler_driver_);
-      } else if (instruction->IsLoadString()) {
-        ProcessLoadString(instruction->AsLoadString());
       }
       // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
       //       here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
@@ -147,10 +145,11 @@
   invoke->SetDispatchInfo(dispatch_info);
 }
 
-HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
-                                                       CodeGenerator* codegen,
-                                                       CompilerDriver* compiler_driver,
-                                                       const DexCompilationUnit& dex_compilation_unit) {
+HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
+    HLoadClass* load_class,
+    CodeGenerator* codegen,
+    CompilerDriver* compiler_driver,
+    const DexCompilationUnit& dex_compilation_unit) {
   Handle<mirror::Class> klass = load_class->GetClass();
   DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
@@ -237,7 +236,12 @@
   return load_kind;
 }
 
-void HSharpening::ProcessLoadString(HLoadString* load_string) {
+void HSharpening::ProcessLoadString(
+    HLoadString* load_string,
+    CodeGenerator* codegen,
+    CompilerDriver* compiler_driver,
+    const DexCompilationUnit& dex_compilation_unit,
+    VariableSizedHandleScope* handles) {
   DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
 
   const DexFile& dex_file = load_string->GetDexFile();
@@ -249,27 +253,27 @@
     ClassLinker* class_linker = runtime->GetClassLinker();
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile())
-        ? compilation_unit_.GetDexCache()
+    Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *dex_compilation_unit.GetDexFile())
+        ? dex_compilation_unit.GetDexCache()
         : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
-    mirror::String* string = nullptr;
+    ObjPtr<mirror::String> string = nullptr;
 
-    if (codegen_->GetCompilerOptions().IsBootImage()) {
+    if (codegen->GetCompilerOptions().IsBootImage()) {
       // Compiling boot image. Resolve the string and allocate it if needed, to ensure
       // the string will be added to the boot image.
       DCHECK(!runtime->UseJitCompilation());
-      string = class_linker->ResolveString(dex_file, string_index, dex_cache);
+      string = class_linker->ResolveString(string_index, dex_cache);
       CHECK(string != nullptr);
-      if (compiler_driver_->GetSupportBootImageFixup()) {
-        DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+      if (compiler_driver->GetSupportBootImageFixup()) {
+        DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
         desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
       } else {
         // compiler_driver_test. Do not sharpen.
         desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
       }
     } else if (runtime->UseJitCompilation()) {
-      DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
-      string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
+      DCHECK(!codegen->GetCompilerOptions().GetCompilePic());
+      string = class_linker->LookupString(string_index, dex_cache.Get());
       if (string != nullptr) {
         if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
           desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -281,9 +285,9 @@
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
-      string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
+      string = class_linker->LookupString(string_index, dex_cache.Get());
       if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
-        if (codegen_->GetCompilerOptions().GetCompilePic()) {
+        if (codegen->GetCompilerOptions().GetCompilePic()) {
           desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
         } else {
           desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -293,12 +297,12 @@
       }
     }
     if (string != nullptr) {
-      load_string->SetString(handles_->NewHandle(string));
+      load_string->SetString(handles->NewHandle(string));
     }
   }
   DCHECK_NE(desired_load_kind, static_cast<HLoadString::LoadKind>(-1));
 
-  HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind);
+  HLoadString::LoadKind load_kind = codegen->GetSupportedLoadStringKind(desired_load_kind);
   load_string->SetLoadKind(load_kind);
 }
 
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index bb1954e..6df7d6d 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -34,26 +34,29 @@
  public:
   HSharpening(HGraph* graph,
               CodeGenerator* codegen,
-              const DexCompilationUnit& compilation_unit,
               CompilerDriver* compiler_driver,
-              VariableSizedHandleScope* handles,
               const char* name = kSharpeningPassName)
       : HOptimization(graph, name),
         codegen_(codegen),
-        compilation_unit_(compilation_unit),
-        compiler_driver_(compiler_driver),
-        handles_(handles) { }
+        compiler_driver_(compiler_driver) { }
 
   void Run() OVERRIDE;
 
   static constexpr const char* kSharpeningPassName = "sharpening";
 
+  // Used by the builder.
+  static void ProcessLoadString(HLoadString* load_string,
+                                CodeGenerator* codegen,
+                                CompilerDriver* compiler_driver,
+                                const DexCompilationUnit& dex_compilation_unit,
+                                VariableSizedHandleScope* handles);
+
   // Used by the builder and the inliner.
   static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
                                                    CodeGenerator* codegen,
                                                    CompilerDriver* compiler_driver,
                                                    const DexCompilationUnit& dex_compilation_unit)
-    REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Used by Sharpening and InstructionSimplifier.
   static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
@@ -61,12 +64,8 @@
                                           CompilerDriver* compiler_driver);
 
  private:
-  void ProcessLoadString(HLoadString* load_string);
-
   CodeGenerator* codegen_;
-  const DexCompilationUnit& compilation_unit_;
   CompilerDriver* compiler_driver_;
-  VariableSizedHandleScope* handles_;
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index e4edbfd..cb38476 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -328,6 +328,8 @@
       HInstruction* array = aget_int->GetArray();
       if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
         // RTP did not type the input array. Bail.
+        VLOG(compiler) << "Not compiled: Could not infer an array type for array operation at "
+                       << aget_int->GetDexPc();
         return false;
       }
 
@@ -368,6 +370,8 @@
       HInstruction* array = aset->GetArray();
       if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) {
         // RTP did not type the input array. Bail.
+        VLOG(compiler) << "Not compiled: Could not infer an array type for array operation at "
+                       << aset->GetDexPc();
         return false;
       }
 
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 9ab7a89..f6bd052 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -474,9 +474,10 @@
   // For a SIMD operation, compute the number of needed spill slots.
   // TODO: do through vector type?
   HInstruction* definition = GetParent()->GetDefinedBy();
-  if (definition != nullptr &&
-      definition->IsVecOperation() &&
-      !definition->IsVecExtractScalar()) {
+  if (definition != nullptr && HVecOperation::ReturnsSIMDValue(definition)) {
+    if (definition->IsPhi()) {
+      definition = definition->InputAt(1);  // SIMD always appears on back-edge
+    }
     return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize;
   }
   // Return number of needed spill slots based on type.
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 0e73e6b..1377e64 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_VIXL_H_
 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_VIXL_H_
 
+#include <android-base/logging.h>
+
 #include "base/arena_containers.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "constants_arm.h"
 #include "offsets.h"
 #include "utils/arm/assembler_arm_shared.h"
diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h
index 5b87e3e..66252be 100644
--- a/compiler/utils/arm/constants_arm.h
+++ b/compiler/utils/arm/constants_arm.h
@@ -21,9 +21,10 @@
 
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "arch/arm/registers_arm.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "globals.h"
 
 namespace art {
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index c13c9af..4bc5d69 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -17,8 +17,10 @@
 #ifndef ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_VIXL_H_
 #define ART_COMPILER_UTILS_ARM_JNI_MACRO_ASSEMBLER_ARM_VIXL_H_
 
+#include <android-base/logging.h>
+
 #include "base/arena_containers.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "constants_arm.h"
 #include "offsets.h"
 #include "utils/arm/assembler_arm_shared.h"
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index 2be2d56..26f23b2 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -17,7 +17,8 @@
 #ifndef ART_COMPILER_UTILS_ARM_MANAGED_REGISTER_ARM_H_
 #define ART_COMPILER_UTILS_ARM_MANAGED_REGISTER_ARM_H_
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "constants_arm.h"
 #include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index bb98958..c83fd44 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -15,7 +15,6 @@
  */
 
 #include "assembler_arm64.h"
-#include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "heap_poisoning.h"
 #include "offsets.h"
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index e5ec24a..8983af2 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -21,8 +21,10 @@
 #include <memory>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "base/arena_containers.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "offsets.h"
 #include "utils/arm64/managed_register_arm64.h"
 #include "utils/assembler.h"
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 573bb6d..a5aa1c1 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -16,7 +16,6 @@
 
 #include "jni_macro_assembler_arm64.h"
 
-#include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "managed_register_arm64.h"
 #include "offsets.h"
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index ce39a13..f531b2a 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -21,10 +21,12 @@
 #include <memory>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "assembler_arm64.h"
 #include "base/arena_containers.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "offsets.h"
 #include "utils/assembler.h"
 #include "utils/jni_macro_assembler.h"
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index 7378a0a..9ce7ec9 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -17,8 +17,9 @@
 #ifndef ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 #define ART_COMPILER_UTILS_ARM64_MANAGED_REGISTER_ARM64_H_
 
+#include <android-base/logging.h>
+
 #include "arch/arm64/registers_arm64.h"
-#include "base/logging.h"
 #include "debug/dwarf/register.h"
 #include "utils/managed_register.h"
 
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index e0cef85..5b0cd6b 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -19,6 +19,8 @@
 
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
 #include "arm/constants_arm.h"
@@ -26,7 +28,6 @@
 #include "base/arena_object.h"
 #include "base/array_ref.h"
 #include "base/enums.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "debug/dwarf/debug_frame_opcode_writer.h"
 #include "label.h"
diff --git a/compiler/utils/intrusive_forward_list.h b/compiler/utils/intrusive_forward_list.h
index 5a358ac..ccdd32a 100644
--- a/compiler/utils/intrusive_forward_list.h
+++ b/compiler/utils/intrusive_forward_list.h
@@ -23,8 +23,9 @@
 #include <memory>
 #include <type_traits>
 
+#include <android-base/logging.h>
+
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/macros.h"
 
 namespace art {
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 0fc1353..f5df926 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -19,12 +19,13 @@
 
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
 #include "base/arena_allocator.h"
 #include "base/arena_object.h"
 #include "base/array_ref.h"
 #include "base/enums.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "managed_register.h"
 #include "offsets.h"
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index b9d4e9c..3c91b2f 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -17,8 +17,8 @@
 #ifndef ART_COMPILER_UTILS_LABEL_H_
 #define ART_COMPILER_UTILS_LABEL_H_
 
-#include "base/logging.h"
-#include "base/macros.h"
+#include <android-base/logging.h>
+#include <android-base/macros.h>
 
 namespace art {
 
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index eb75f8b..2218ef9 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -42,26 +42,13 @@
 
 MipsAssembler::DelaySlot::DelaySlot()
     : instruction_(0),
-      gpr_outs_mask_(0),
-      gpr_ins_mask_(0),
-      fpr_outs_mask_(0),
-      fpr_ins_mask_(0),
-      cc_outs_mask_(0),
-      cc_ins_mask_(0),
       patcher_label_(nullptr) {}
 
-void MipsAssembler::DsFsmInstr(uint32_t instruction,
-                               uint32_t gpr_outs_mask,
-                               uint32_t gpr_ins_mask,
-                               uint32_t fpr_outs_mask,
-                               uint32_t fpr_ins_mask,
-                               uint32_t cc_outs_mask,
-                               uint32_t cc_ins_mask,
-                               MipsLabel* patcher_label) {
+InOutRegMasks& MipsAssembler::DsFsmInstr(uint32_t instruction, MipsLabel* patcher_label) {
   if (!reordering_) {
     CHECK_EQ(ds_fsm_state_, kExpectingLabel);
     CHECK_EQ(delay_slot_.instruction_, 0u);
-    return;
+    return delay_slot_.masks_;
   }
   switch (ds_fsm_state_) {
     case kExpectingLabel:
@@ -92,13 +79,9 @@
       break;
   }
   delay_slot_.instruction_ = instruction;
-  delay_slot_.gpr_outs_mask_ = gpr_outs_mask & ~1u;  // Ignore register ZERO.
-  delay_slot_.gpr_ins_mask_ = gpr_ins_mask & ~1u;  // Ignore register ZERO.
-  delay_slot_.fpr_outs_mask_ = fpr_outs_mask;
-  delay_slot_.fpr_ins_mask_ = fpr_ins_mask;
-  delay_slot_.cc_outs_mask_ = cc_outs_mask;
-  delay_slot_.cc_ins_mask_ = cc_ins_mask;
+  delay_slot_.masks_ = InOutRegMasks();
   delay_slot_.patcher_label_ = patcher_label;
+  return delay_slot_.masks_;
 }
 
 void MipsAssembler::DsFsmLabel() {
@@ -167,73 +150,7 @@
 }
 
 void MipsAssembler::DsFsmInstrNop(uint32_t instruction ATTRIBUTE_UNUSED) {
-  DsFsmInstr(0, 0, 0, 0, 0, 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrRrr(uint32_t instruction,
-                                  Register out,
-                                  Register in1,
-                                  Register in2,
-                                  MipsLabel* patcher_label) {
-  DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0, patcher_label);
-}
-
-void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction,
-                                   Register in1_out,
-                                   Register in2,
-                                   Register in3) {
-  DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0, 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrFff(uint32_t instruction,
-                                  FRegister out,
-                                  FRegister in1,
-                                  FRegister in2) {
-  DsFsmInstr(instruction, 0, 0, (1u << out), (1u << in1) | (1u << in2), 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrFfff(uint32_t instruction,
-                                   FRegister in1_out,
-                                   FRegister in2,
-                                   FRegister in3) {
-  DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrFffr(uint32_t instruction,
-                                   FRegister in1_out,
-                                   FRegister in2,
-                                   Register in3) {
-  DsFsmInstr(instruction, 0, (1u << in3), (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrRf(uint32_t instruction, Register out, FRegister in) {
-  DsFsmInstr(instruction, (1u << out), 0, 0, (1u << in), 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrFr(uint32_t instruction, FRegister out, Register in) {
-  DsFsmInstr(instruction, 0, (1u << in), (1u << out), 0, 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2) {
-  DsFsmInstr(instruction, 0, (1u << in2), 0, (1u << in1), 0, 0);
-}
-
-void MipsAssembler::DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2) {
-  DsFsmInstr(instruction, 0, 0, 0, (1u << in1) | (1u << in2), (1 << cc_out), 0);
-}
-
-void MipsAssembler::DsFsmInstrRrrc(uint32_t instruction,
-                                   Register in1_out,
-                                   Register in2,
-                                   int cc_in) {
-  DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0, 0, (1 << cc_in));
-}
-
-void MipsAssembler::DsFsmInstrFffc(uint32_t instruction,
-                                   FRegister in1_out,
-                                   FRegister in2,
-                                   int cc_in) {
-  DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, (1 << cc_in));
+  DsFsmInstr(0);
 }
 
 void MipsAssembler::FinalizeCode() {
@@ -535,14 +452,14 @@
 }
 
 void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x21)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
   if (patcher_label != nullptr) {
     Bind(patcher_label);
   }
-  DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs, patcher_label);
+  DsFsmInstr(EmitI(0x9, rs, rt, imm16), patcher_label).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
@@ -550,32 +467,32 @@
 }
 
 void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x23), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x23)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::MultR2(Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18), ZERO, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18)).GprIns(rs, rt);
 }
 
 void MipsAssembler::MultuR2(Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19), ZERO, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19)).GprIns(rs, rt);
 }
 
 void MipsAssembler::DivR2(Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a), ZERO, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a)).GprIns(rs, rt);
 }
 
 void MipsAssembler::DivuR2(Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b), ZERO, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b)).GprIns(rs, rt);
 }
 
 void MipsAssembler::MulR2(Register rd, Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0x1c, rs, rt, rd, 0, 2), rd, rs, rt);
+  DsFsmInstr(EmitR(0x1c, rs, rt, rd, 0, 2)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::DivR2(Register rd, Register rs, Register rt) {
@@ -604,179 +521,181 @@
 
 void MipsAssembler::MulR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x18), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 2, 0x18)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x18), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 3, 0x18)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x19), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 3, 0x19)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::DivR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1a), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 2, 0x1a)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::ModR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1a), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 3, 0x1a)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::DivuR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1b), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 2, 0x1b)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::ModuR6(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1b), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 3, 0x1b)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::And(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x24), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x24)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Andi(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0xc, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0xc, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Or(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x25), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x25)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Ori(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0xd, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0xd, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Xor(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x26), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x26)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Xori(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0xe, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0xe, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Nor(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x27), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x27)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Movz(Register rd, Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0A), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x0A)).GprInOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Movn(Register rd, Register rs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0B), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x0B)).GprInOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x35), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x35)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Selnez(Register rd, Register rs, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x37), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x37)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::ClzR6(Register rd, Register rs) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10), rd, rs, rs);
+  DsFsmInstr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10)).GprOuts(rd).GprIns(rs);
 }
 
 void MipsAssembler::ClzR2(Register rd, Register rs) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x20), rd, rs, rs);
+  DsFsmInstr(EmitR(0x1C, rs, rd, rd, 0, 0x20)).GprOuts(rd).GprIns(rs);
 }
 
 void MipsAssembler::CloR6(Register rd, Register rs) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11), rd, rs, rs);
+  DsFsmInstr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11)).GprOuts(rd).GprIns(rs);
 }
 
 void MipsAssembler::CloR2(Register rd, Register rs) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x21), rd, rs, rs);
+  DsFsmInstr(EmitR(0x1C, rs, rd, rd, 0, 0x21)).GprOuts(rd).GprIns(rs);
 }
 
 void MipsAssembler::Seb(Register rd, Register rt) {
-  DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20), rd, rt, rt);
+  DsFsmInstr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Seh(Register rd, Register rt) {
-  DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20), rd, rt, rt);
+  DsFsmInstr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Wsbh(Register rd, Register rt) {
-  DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20), rd, rt, rt);
+  DsFsmInstr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Bitswap(Register rd, Register rt) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20), rd, rt, rt);
+  DsFsmInstr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
   CHECK(IsUint<5>(shamt)) << shamt;
-  DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00), rd, rt, rt);
+  DsFsmInstr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
   CHECK(IsUint<5>(shamt)) << shamt;
-  DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02), rd, rt, rt);
+  DsFsmInstr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
   CHECK(IsUint<5>(shamt)) << shamt;
-  DsFsmInstrRrr(EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02), rd, rt, rt);
+  DsFsmInstr(EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
   CHECK(IsUint<5>(shamt)) << shamt;
-  DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03), rd, rt, rt);
+  DsFsmInstr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03)).GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Sllv(Register rd, Register rt, Register rs) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x04), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x04)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Srlv(Register rd, Register rt, Register rs) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x06), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x06)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 1, 0x06), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 1, 0x06)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x07), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x07)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) {
   CHECK(IsUint<5>(pos)) << pos;
   CHECK(0 < size && size <= 32) << size;
   CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
-  DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00), rd, rt, rt);
+  DsFsmInstr(EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00))
+      .GprOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) {
   CHECK(IsUint<5>(pos)) << pos;
   CHECK(0 < size && size <= 32) << size;
   CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size;
-  DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt);
+  DsFsmInstr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04))
+      .GprInOuts(rd).GprIns(rt);
 }
 
 void MipsAssembler::Lsa(Register rd, Register rs, Register rt, int saPlusOne) {
   CHECK(IsR6() || HasMsa());
   CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne;
   int sa = saPlusOne - 1;
-  DsFsmInstrRrr(EmitR(0x0, rs, rt, rd, sa, 0x05), rd, rs, rt);
+  DsFsmInstr(EmitR(0x0, rs, rt, rd, sa, 0x05)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::ShiftAndAdd(Register dst,
@@ -798,18 +717,18 @@
 }
 
 void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x20, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0x20, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lh(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0x21, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
   if (patcher_label != nullptr) {
     Bind(patcher_label);
   }
-  DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs, patcher_label);
+  DsFsmInstr(EmitI(0x23, rs, rt, imm16), patcher_label).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) {
@@ -818,20 +737,20 @@
 
 void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitI(0x22, rs, rt, imm16), rt, rt, rs);
+  DsFsmInstr(EmitI(0x22, rs, rt, imm16)).GprInOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitI(0x26, rs, rt, imm16), rt, rt, rs);
+  DsFsmInstr(EmitI(0x26, rs, rt, imm16)).GprInOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x24, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0x24, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x25, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0x25, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Lwpc(Register rs, uint32_t imm19) {
@@ -841,12 +760,12 @@
 }
 
 void MipsAssembler::Lui(Register rt, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0xf, static_cast<Register>(0), rt, imm16), rt, ZERO, ZERO);
+  DsFsmInstr(EmitI(0xf, static_cast<Register>(0), rt, imm16)).GprOuts(rt);
 }
 
 void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) {
   CHECK(IsR6());
-  DsFsmInstrRrr(EmitI(0xf, rs, rt, imm16), rt, rt, rs);
+  DsFsmInstr(EmitI(0xf, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::AddUpper(Register rt, Register rs, uint16_t imm16, Register tmp) {
@@ -871,27 +790,27 @@
 
 void MipsAssembler::Mfhi(Register rd) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x10), rd, ZERO, ZERO);
+  DsFsmInstr(EmitR(0, ZERO, ZERO, rd, 0, 0x10)).GprOuts(rd);
 }
 
 void MipsAssembler::Mflo(Register rd) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x12), rd, ZERO, ZERO);
+  DsFsmInstr(EmitR(0, ZERO, ZERO, rd, 0, 0x12)).GprOuts(rd);
 }
 
 void MipsAssembler::Sb(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x28, rs, rt, imm16), ZERO, rt, rs);
+  DsFsmInstr(EmitI(0x28, rs, rt, imm16)).GprIns(rt, rs);
 }
 
 void MipsAssembler::Sh(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs);
+  DsFsmInstr(EmitI(0x29, rs, rt, imm16)).GprIns(rt, rs);
 }
 
 void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
   if (patcher_label != nullptr) {
     Bind(patcher_label);
   }
-  DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs, patcher_label);
+  DsFsmInstr(EmitI(0x2b, rs, rt, imm16), patcher_label).GprIns(rt, rs);
 }
 
 void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) {
@@ -900,50 +819,50 @@
 
 void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitI(0x2a, rs, rt, imm16), ZERO, rt, rs);
+  DsFsmInstr(EmitI(0x2a, rs, rt, imm16)).GprIns(rt, rs);
 }
 
 void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitI(0x2e, rs, rt, imm16), ZERO, rt, rs);
+  DsFsmInstr(EmitI(0x2e, rs, rt, imm16)).GprIns(rt, rs);
 }
 
 void MipsAssembler::LlR2(Register rt, Register base, int16_t imm16) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitI(0x30, base, rt, imm16), rt, base, base);
+  DsFsmInstr(EmitI(0x30, base, rt, imm16)).GprOuts(rt).GprIns(base);
 }
 
 void MipsAssembler::ScR2(Register rt, Register base, int16_t imm16) {
   CHECK(!IsR6());
-  DsFsmInstrRrr(EmitI(0x38, base, rt, imm16), rt, rt, base);
+  DsFsmInstr(EmitI(0x38, base, rt, imm16)).GprInOuts(rt).GprIns(base);
 }
 
 void MipsAssembler::LlR6(Register rt, Register base, int16_t imm9) {
   CHECK(IsR6());
   CHECK(IsInt<9>(imm9));
-  DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36), rt, base, base);
+  DsFsmInstr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36)).GprOuts(rt).GprIns(base);
 }
 
 void MipsAssembler::ScR6(Register rt, Register base, int16_t imm9) {
   CHECK(IsR6());
   CHECK(IsInt<9>(imm9));
-  DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26), rt, rt, base);
+  DsFsmInstr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26)).GprInOuts(rt).GprIns(base);
 }
 
 void MipsAssembler::Slt(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2a), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x2a)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Sltu(Register rd, Register rs, Register rt) {
-  DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2b), rd, rs, rt);
+  DsFsmInstr(EmitR(0, rs, rt, rd, 0, 0x2b)).GprOuts(rd).GprIns(rs, rt);
 }
 
 void MipsAssembler::Slti(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0xa, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0xa, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::Sltiu(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0xb, rs, rt, imm16), rt, rs, rs);
+  DsFsmInstr(EmitI(0xb, rs, rt, imm16)).GprOuts(rt).GprIns(rs);
 }
 
 void MipsAssembler::B(uint16_t imm16) {
@@ -1021,8 +940,8 @@
   uint32_t last_instruction = delay_slot_.instruction_;
   MipsLabel* patcher_label = delay_slot_.patcher_label_;
   bool exchange = (last_instruction != 0 &&
-      (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 &&
-      ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0);
+      (delay_slot_.masks_.gpr_outs_ & (1u << rs)) == 0 &&
+      ((delay_slot_.masks_.gpr_ins_ | delay_slot_.masks_.gpr_outs_) & (1u << rd)) == 0);
   if (exchange) {
     // The last instruction cannot be used in a different delay slot,
     // do not commit the label before it (if any).
@@ -1305,67 +1224,67 @@
 }
 
 void MipsAssembler::AddS(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x0), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x0)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SubS(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x1)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::MulS(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x2), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x2)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::DivS(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x3), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x3)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::AddD(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x0), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x0)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SubD(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x1)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::MulD(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x2), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x2)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x3), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x3)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SqrtS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::SqrtD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::AbsS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::AbsD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::MovS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::MovD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::NegS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::NegD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::CunS(FRegister fs, FRegister ft) {
@@ -1375,7 +1294,8 @@
 void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CeqS(FRegister fs, FRegister ft) {
@@ -1385,7 +1305,8 @@
 void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CueqS(FRegister fs, FRegister ft) {
@@ -1395,7 +1316,8 @@
 void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::ColtS(FRegister fs, FRegister ft) {
@@ -1405,7 +1327,8 @@
 void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CultS(FRegister fs, FRegister ft) {
@@ -1415,7 +1338,8 @@
 void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::ColeS(FRegister fs, FRegister ft) {
@@ -1425,7 +1349,8 @@
 void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CuleS(FRegister fs, FRegister ft) {
@@ -1435,7 +1360,8 @@
 void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CunD(FRegister fs, FRegister ft) {
@@ -1445,7 +1371,8 @@
 void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CeqD(FRegister fs, FRegister ft) {
@@ -1455,7 +1382,8 @@
 void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CueqD(FRegister fs, FRegister ft) {
@@ -1465,7 +1393,8 @@
 void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::ColtD(FRegister fs, FRegister ft) {
@@ -1475,7 +1404,8 @@
 void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CultD(FRegister fs, FRegister ft) {
@@ -1485,7 +1415,8 @@
 void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::ColeD(FRegister fs, FRegister ft) {
@@ -1495,7 +1426,8 @@
 void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CuleD(FRegister fs, FRegister ft) {
@@ -1505,301 +1437,323 @@
 void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37))
+      .CcOuts(cc).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x01), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x01)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x02), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x02)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x03), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x03)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x04), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x04)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x05), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x05)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x06), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x06)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x07), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x07)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x11), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x11)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x12), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x12)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x13), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x14, ft, fs, fd, 0x13)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x01), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x01)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x02), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x02)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x03), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x03)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x04), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x04)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x05), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x05)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x06), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x06)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x07), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x07)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x11), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x11)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x12), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x12)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x13), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x15, ft, fs, fd, 0x13)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::Movf(Register rd, Register rs, int cc) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01), rd, rs, cc);
+  DsFsmInstr(EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01))
+      .GprInOuts(rd).GprIns(rs).CcIns(cc);
 }
 
 void MipsAssembler::Movt(Register rd, Register rs, int cc) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01), rd, rs, cc);
+  DsFsmInstr(EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01))
+      .GprInOuts(rd).GprIns(rs).CcIns(cc);
 }
 
 void MipsAssembler::MovfS(FRegister fd, FRegister fs, int cc) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11))
+      .FprInOuts(fd).FprIns(fs).CcIns(cc);
 }
 
 void MipsAssembler::MovfD(FRegister fd, FRegister fs, int cc) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11))
+      .FprInOuts(fd).FprIns(fs).CcIns(cc);
 }
 
 void MipsAssembler::MovtS(FRegister fd, FRegister fs, int cc) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11),
-                 fd,
-                 fs,
-                 cc);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11))
+      .FprInOuts(fd).FprIns(fs).CcIns(cc);
 }
 
 void MipsAssembler::MovtD(FRegister fd, FRegister fs, int cc) {
   CHECK(!IsR6());
   CHECK(IsUint<3>(cc)) << cc;
-  DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11),
-                 fd,
-                 fs,
-                 cc);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11))
+      .FprInOuts(fd).FprIns(fs).CcIns(cc);
 }
 
 void MipsAssembler::MovzS(FRegister fd, FRegister fs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrFffr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x12), fd, fs, rt);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x12))
+      .FprInOuts(fd).FprIns(fs).GprIns(rt);
 }
 
 void MipsAssembler::MovzD(FRegister fd, FRegister fs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrFffr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x12), fd, fs, rt);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x12))
+      .FprInOuts(fd).FprIns(fs).GprIns(rt);
 }
 
 void MipsAssembler::MovnS(FRegister fd, FRegister fs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrFffr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x13), fd, fs, rt);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x13))
+      .FprInOuts(fd).FprIns(fs).GprIns(rt);
 }
 
 void MipsAssembler::MovnD(FRegister fd, FRegister fs, Register rt) {
   CHECK(!IsR6());
-  DsFsmInstrFffr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x13), fd, fs, rt);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x13))
+      .FprInOuts(fd).FprIns(fs).GprIns(rt);
 }
 
 void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFfff(EmitFR(0x11, 0x10, ft, fs, fd, 0x10), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x10)).FprInOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SelD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFfff(EmitFR(0x11, 0x11, ft, fs, fd, 0x10), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x10)).FprInOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SeleqzS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x14), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x14)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SeleqzD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x14), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x14)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SelnezS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x17), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x17)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::SelnezD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x17), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x17)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::ClassS(FRegister fd, FRegister fs) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::ClassD(FRegister fd, FRegister fs) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::MinS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1c), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x1c)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::MinD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1c), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x1c)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::MaxS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1e), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x10, ft, fs, fd, 0x1e)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::MaxD(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
-  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1e), fd, fs, ft);
+  DsFsmInstr(EmitFR(0x11, 0x11, ft, fs, fd, 0x1e)).FprOuts(fd).FprIns(fs, ft);
 }
 
 void MipsAssembler::TruncLS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::TruncLD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::TruncWS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::TruncWD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::Cvtdw(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::Cvtsd(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::Cvtds(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::FloorWS(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf)).FprOuts(fd).FprIns(fs);
 }
 
 void MipsAssembler::FloorWD(FRegister fd, FRegister fs) {
-  DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs);
+  DsFsmInstr(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf)).FprOuts(fd).FprIns(fs);
+}
+
+FRegister MipsAssembler::GetFpuRegLow(FRegister reg) {
+  // If FPRs are 32-bit (and get paired to hold 64-bit values), accesses to
+  // odd-numbered FPRs are reattributed to even-numbered FPRs. This lets us
+  // use only even-numbered FPRs irrespective of whether we're doing single-
+  // or double-precision arithmetic. (We don't use odd-numbered 32-bit FPRs
+  // to hold single-precision values).
+  return Is32BitFPU() ? static_cast<FRegister>(reg & ~1u) : reg;
 }
 
 void MipsAssembler::Mfc1(Register rt, FRegister fs) {
-  DsFsmInstrRf(EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
-               rt,
-               fs);
+  DsFsmInstr(EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0))
+      .GprOuts(rt).FprIns(GetFpuRegLow(fs));
 }
 
+// Note, the 32 LSBs of a 64-bit value must be loaded into an FPR before the 32 MSBs
+// when loading the value as 32-bit halves.
 void MipsAssembler::Mtc1(Register rt, FRegister fs) {
-  DsFsmInstrFr(EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
-               fs,
-               rt);
+  uint32_t encoding =
+      EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0);
+  if (Is32BitFPU() && (fs % 2 != 0)) {
+    // If mtc1 is used to simulate mthc1 by writing to the odd-numbered FPR in
+    // a pair of 32-bit FPRs, the associated even-numbered FPR is an in/out.
+    DsFsmInstr(encoding).FprInOuts(GetFpuRegLow(fs)).GprIns(rt);
+  } else {
+    // Otherwise (the FPR is 64-bit or even-numbered), the FPR is an out.
+    DsFsmInstr(encoding).FprOuts(fs).GprIns(rt);
+  }
 }
 
 void MipsAssembler::Mfhc1(Register rt, FRegister fs) {
-  DsFsmInstrRf(EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
-               rt,
-               fs);
+  DsFsmInstr(EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0))
+      .GprOuts(rt).FprIns(fs);
 }
 
+// Note, the 32 LSBs of a 64-bit value must be loaded into an FPR before the 32 MSBs
+// when loading the value as 32-bit halves.
 void MipsAssembler::Mthc1(Register rt, FRegister fs) {
-  DsFsmInstrFr(EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0),
-               fs,
-               rt);
+  DsFsmInstr(EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0))
+      .FprInOuts(fs).GprIns(rt);
 }
 
 void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) {
@@ -1820,20 +1774,30 @@
   }
 }
 
+// Note, the 32 LSBs of a 64-bit value must be loaded into an FPR before the 32 MSBs
+// when loading the value as 32-bit halves.
 void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) {
-  DsFsmInstrFr(EmitI(0x31, rs, static_cast<Register>(ft), imm16), ft, rs);
+  uint32_t encoding = EmitI(0x31, rs, static_cast<Register>(ft), imm16);
+  if (Is32BitFPU() && (ft % 2 != 0)) {
+    // If lwc1 is used to load the odd-numbered FPR in a pair of 32-bit FPRs,
+    // the associated even-numbered FPR is an in/out.
+    DsFsmInstr(encoding).FprInOuts(GetFpuRegLow(ft)).GprIns(rs);
+  } else {
+    // Otherwise (the FPR is 64-bit or even-numbered), the FPR is an out.
+    DsFsmInstr(encoding).FprOuts(ft).GprIns(rs);
+  }
 }
 
 void MipsAssembler::Ldc1(FRegister ft, Register rs, uint16_t imm16) {
-  DsFsmInstrFr(EmitI(0x35, rs, static_cast<Register>(ft), imm16), ft, rs);
+  DsFsmInstr(EmitI(0x35, rs, static_cast<Register>(ft), imm16)).FprOuts(ft).GprIns(rs);
 }
 
 void MipsAssembler::Swc1(FRegister ft, Register rs, uint16_t imm16) {
-  DsFsmInstrFR(EmitI(0x39, rs, static_cast<Register>(ft), imm16), ft, rs);
+  DsFsmInstr(EmitI(0x39, rs, static_cast<Register>(ft), imm16)).FprIns(GetFpuRegLow(ft)).GprIns(rs);
 }
 
 void MipsAssembler::Sdc1(FRegister ft, Register rs, uint16_t imm16) {
-  DsFsmInstrFR(EmitI(0x3d, rs, static_cast<Register>(ft), imm16), ft, rs);
+  DsFsmInstr(EmitI(0x3d, rs, static_cast<Register>(ft), imm16)).FprIns(ft).GprIns(rs);
 }
 
 void MipsAssembler::Break() {
@@ -1882,1447 +1846,951 @@
 
 void MipsAssembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Ffint_sW(VectorRegister wd, VectorRegister ws) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::Ffint_sD(VectorRegister wd, VectorRegister ws) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::Ftint_sW(VectorRegister wd, VectorRegister ws) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::Ftint_sD(VectorRegister wd, VectorRegister ws) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(shamt3)) << shamt3;
-  DsFsmInstrFff(EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(shamt4)) << shamt4;
-  DsFsmInstrFff(EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) {
   CHECK(HasMsa());
   CHECK(IsUint<5>(shamt5)) << shamt5;
-  DsFsmInstrFff(EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) {
   CHECK(HasMsa());
   CHECK(IsUint<6>(shamt6)) << shamt6;
-  DsFsmInstrFff(EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(shamt3)) << shamt3;
-  DsFsmInstrFff(EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(shamt4)) << shamt4;
-  DsFsmInstrFff(EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) {
   CHECK(HasMsa());
   CHECK(IsUint<5>(shamt5)) << shamt5;
-  DsFsmInstrFff(EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) {
   CHECK(HasMsa());
   CHECK(IsUint<6>(shamt6)) << shamt6;
-  DsFsmInstrFff(EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(shamt3)) << shamt3;
-  DsFsmInstrFff(EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(shamt4)) << shamt4;
-  DsFsmInstrFff(EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) {
   CHECK(HasMsa());
   CHECK(IsUint<5>(shamt5)) << shamt5;
-  DsFsmInstrFff(EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) {
   CHECK(HasMsa());
   CHECK(IsUint<6>(shamt6)) << shamt6;
-  DsFsmInstrFff(EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::MoveV(VectorRegister wd, VectorRegister ws) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(n4)) << n4;
-  DsFsmInstrFff(EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(n3)) << n3;
-  DsFsmInstrFff(EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) {
   CHECK(HasMsa());
   CHECK(IsUint<2>(n2)) << n2;
-  DsFsmInstrFff(EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) {
   CHECK(HasMsa());
   CHECK(IsUint<1>(n1)) << n1;
-  DsFsmInstrFff(EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19)).FprOuts(wd).FprIns(ws);
 }
 
 void MipsAssembler::Copy_sB(Register rd, VectorRegister ws, int n4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(n4)) << n4;
-  DsFsmInstrRf(EmitMsaELM(0x2, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19),
-               rd,
-               static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x2, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19))
+      .GprOuts(rd).FprIns(ws);
 }
 
 void MipsAssembler::Copy_sH(Register rd, VectorRegister ws, int n3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(n3)) << n3;
-  DsFsmInstrRf(EmitMsaELM(0x2, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19),
-               rd,
-               static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x2, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19))
+      .GprOuts(rd).FprIns(ws);
 }
 
 void MipsAssembler::Copy_sW(Register rd, VectorRegister ws, int n2) {
   CHECK(HasMsa());
   CHECK(IsUint<2>(n2)) << n2;
-  DsFsmInstrRf(EmitMsaELM(0x2, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19),
-               rd,
-               static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x2, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19))
+      .GprOuts(rd).FprIns(ws);
 }
 
 void MipsAssembler::Copy_uB(Register rd, VectorRegister ws, int n4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(n4)) << n4;
-  DsFsmInstrRf(EmitMsaELM(0x3, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19),
-               rd,
-               static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x3, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19))
+      .GprOuts(rd).FprIns(ws);
 }
 
 void MipsAssembler::Copy_uH(Register rd, VectorRegister ws, int n3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(n3)) << n3;
-  DsFsmInstrRf(EmitMsaELM(0x3, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19),
-               rd,
-               static_cast<FRegister>(ws));
+  DsFsmInstr(EmitMsaELM(0x3, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19))
+      .GprOuts(rd).FprIns(ws);
 }
 
 void MipsAssembler::InsertB(VectorRegister wd, Register rs, int n4) {
   CHECK(HasMsa());
   CHECK(IsUint<4>(n4)) << n4;
-  DsFsmInstrFffr(EmitMsaELM(0x4, n4 | kMsaDfNByteMask, static_cast<VectorRegister>(rs), wd, 0x19),
-                 static_cast<FRegister>(wd),
-                 static_cast<FRegister>(wd),
-                 rs);
+  DsFsmInstr(EmitMsaELM(0x4, n4 | kMsaDfNByteMask, static_cast<VectorRegister>(rs), wd, 0x19))
+      .FprInOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::InsertH(VectorRegister wd, Register rs, int n3) {
   CHECK(HasMsa());
   CHECK(IsUint<3>(n3)) << n3;
-  DsFsmInstrFffr(
-      EmitMsaELM(0x4, n3 | kMsaDfNHalfwordMask, static_cast<VectorRegister>(rs), wd, 0x19),
-      static_cast<FRegister>(wd),
-      static_cast<FRegister>(wd),
-      rs);
+  DsFsmInstr(EmitMsaELM(0x4, n3 | kMsaDfNHalfwordMask, static_cast<VectorRegister>(rs), wd, 0x19))
+      .FprInOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::InsertW(VectorRegister wd, Register rs, int n2) {
   CHECK(HasMsa());
   CHECK(IsUint<2>(n2)) << n2;
-  DsFsmInstrFffr(EmitMsaELM(0x4, n2 | kMsaDfNWordMask, static_cast<VectorRegister>(rs), wd, 0x19),
-                 static_cast<FRegister>(wd),
-                 static_cast<FRegister>(wd),
-                 rs);
+  DsFsmInstr(EmitMsaELM(0x4, n2 | kMsaDfNWordMask, static_cast<VectorRegister>(rs), wd, 0x19))
+      .FprInOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::FillB(VectorRegister wd, Register rs) {
   CHECK(HasMsa());
-  DsFsmInstrFr(EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e))
+      .FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::FillH(VectorRegister wd, Register rs) {
   CHECK(HasMsa());
-  DsFsmInstrFr(EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e))
+      .FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::FillW(VectorRegister wd, Register rs) {
   CHECK(HasMsa());
-  DsFsmInstrFr(EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e))
+      .FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::LdiB(VectorRegister wd, int imm8) {
   CHECK(HasMsa());
   CHECK(IsInt<8>(imm8)) << imm8;
-  DsFsmInstrFr(EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7),
-               static_cast<FRegister>(wd),
-               ZERO);
+  DsFsmInstr(EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7)).FprOuts(wd);
 }
 
 void MipsAssembler::LdiH(VectorRegister wd, int imm10) {
   CHECK(HasMsa());
   CHECK(IsInt<10>(imm10)) << imm10;
-  DsFsmInstrFr(EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7),
-               static_cast<FRegister>(wd),
-               ZERO);
+  DsFsmInstr(EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7)).FprOuts(wd);
 }
 
 void MipsAssembler::LdiW(VectorRegister wd, int imm10) {
   CHECK(HasMsa());
   CHECK(IsInt<10>(imm10)) << imm10;
-  DsFsmInstrFr(EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7),
-               static_cast<FRegister>(wd),
-               ZERO);
+  DsFsmInstr(EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7)).FprOuts(wd);
 }
 
 void MipsAssembler::LdiD(VectorRegister wd, int imm10) {
   CHECK(HasMsa());
   CHECK(IsInt<10>(imm10)) << imm10;
-  DsFsmInstrFr(EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7),
-               static_cast<FRegister>(wd),
-               ZERO);
+  DsFsmInstr(EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7)).FprOuts(wd);
 }
 
 void MipsAssembler::LdB(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<10>(offset)) << offset;
-  DsFsmInstrFr(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0)).FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::LdH(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<11>(offset)) << offset;
   CHECK_ALIGNED(offset, kMipsHalfwordSize);
-  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1))
+      .FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::LdW(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<12>(offset)) << offset;
   CHECK_ALIGNED(offset, kMipsWordSize);
-  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2))
+      .FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::LdD(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<13>(offset)) << offset;
   CHECK_ALIGNED(offset, kMipsDoublewordSize);
-  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3))
+      .FprOuts(wd).GprIns(rs);
 }
 
 void MipsAssembler::StB(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<10>(offset)) << offset;
-  DsFsmInstrFR(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0), static_cast<FRegister>(wd), rs);
+  DsFsmInstr(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0)).FprIns(wd).GprIns(rs);
 }
 
 void MipsAssembler::StH(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<11>(offset)) << offset;
   CHECK_ALIGNED(offset, kMipsHalfwordSize);
-  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1))
+      .FprIns(wd).GprIns(rs);
 }
 
 void MipsAssembler::StW(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<12>(offset)) << offset;
   CHECK_ALIGNED(offset, kMipsWordSize);
-  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2))
+      .FprIns(wd).GprIns(rs);
 }
 
 void MipsAssembler::StD(VectorRegister wd, Register rs, int offset) {
   CHECK(HasMsa());
   CHECK(IsInt<13>(offset)) << offset;
   CHECK_ALIGNED(offset, kMipsDoublewordSize);
-  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3),
-               static_cast<FRegister>(wd),
-               rs);
+  DsFsmInstr(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3))
+      .FprIns(wd).GprIns(rs);
 }
 
 void MipsAssembler::IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x14),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x14)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MaddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MaddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MaddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MsubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MsubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MsubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::MsubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0x12),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0x12)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Asub_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x11),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x11)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0x1b)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0x1b)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0x1b)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::FmsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0x1b),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0x1b)).FprInOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x15),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x15),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x15),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x15),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x15),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
-  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15),
-                static_cast<FRegister>(wd),
-                static_cast<FRegister>(ws),
-                static_cast<FRegister>(wt));
+  DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt);
 }
 
 void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst,
@@ -4144,7 +3612,7 @@
     case kLongCall:
       // Instructions depending on or modifying RA should not be moved into delay slots
       // of branches modifying RA.
-      return ((delay_slot.gpr_ins_mask_ | delay_slot.gpr_outs_mask_) & (1u << RA)) == 0;
+      return ((delay_slot.masks_.gpr_ins_ | delay_slot.masks_.gpr_outs_) & (1u << RA)) == 0;
 
     // R2 conditional branches.
     case kCondBranch:
@@ -4157,17 +3625,17 @@
         case kCondGTZ:
         case kCondEQZ:
         case kCondNEZ:
-          return (delay_slot.gpr_outs_mask_ & (1u << lhs_reg_)) == 0;
+          return (delay_slot.masks_.gpr_outs_ & (1u << lhs_reg_)) == 0;
 
         // Branches with two GPR sources.
         case kCondEQ:
         case kCondNE:
-          return (delay_slot.gpr_outs_mask_ & ((1u << lhs_reg_) | (1u << rhs_reg_))) == 0;
+          return (delay_slot.masks_.gpr_outs_ & ((1u << lhs_reg_) | (1u << rhs_reg_))) == 0;
 
         // Branches with one FPU condition code source.
         case kCondF:
         case kCondT:
-          return (delay_slot.cc_outs_mask_ & (1u << lhs_reg_)) == 0;
+          return (delay_slot.masks_.cc_outs_ & (1u << lhs_reg_)) == 0;
 
         default:
           // We don't support synthetic R2 branches (preceded with slt[u]) at this level
@@ -4192,7 +3660,7 @@
         // Branches with one FPU register source.
         case kCondF:
         case kCondT:
-          return (delay_slot.fpr_outs_mask_ & (1u << lhs_reg_)) == 0;
+          return (delay_slot.masks_.fpr_outs_ & (1u << lhs_reg_)) == 0;
         // Others have a forbidden slot instead of a delay slot.
         default:
           return false;
@@ -4858,8 +4326,8 @@
   // Likewise, if the instruction depends on AT, it can't be exchanged with slt[u]
   // because slt[u] changes AT.
   return (delay_slot_.instruction_ != 0 &&
-      (delay_slot_.gpr_outs_mask_ & ((1u << AT) | (1u << rs) | (1u << rt))) == 0 &&
-      (delay_slot_.gpr_ins_mask_ & (1u << AT)) == 0);
+      (delay_slot_.masks_.gpr_outs_ & ((1u << AT) | (1u << rs) | (1u << rt))) == 0 &&
+      (delay_slot_.masks_.gpr_ins_ & (1u << AT)) == 0);
 }
 
 void MipsAssembler::ExchangeWithSlt(const DelaySlot& forwarded_slot) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 1c3097a..7de8e2e 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -74,6 +74,81 @@
   kPositiveZero      = 0x200,
 };
 
+// Instruction description in terms of input and output registers.
+// Used for instruction reordering.
+struct InOutRegMasks {
+  InOutRegMasks()
+      : gpr_outs_(0), gpr_ins_(0), fpr_outs_(0), fpr_ins_(0), cc_outs_(0), cc_ins_(0) {}
+
+  inline InOutRegMasks& GprOuts(Register reg) {
+    gpr_outs_ |= (1u << reg);
+    gpr_outs_ &= ~1u;  // Ignore register ZERO.
+    return *this;
+  }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& GprOuts(T one, Ts... more) { GprOuts(one); GprOuts(more...); return *this; }
+
+  inline InOutRegMasks& GprIns(Register reg) {
+    gpr_ins_ |= (1u << reg);
+    gpr_ins_ &= ~1u;  // Ignore register ZERO.
+    return *this;
+  }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& GprIns(T one, Ts... more) { GprIns(one); GprIns(more...); return *this; }
+
+  inline InOutRegMasks& GprInOuts(Register reg) { GprIns(reg); GprOuts(reg); return *this; }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& GprInOuts(T one, Ts... more) {
+    GprInOuts(one);
+    GprInOuts(more...);
+    return *this;
+  }
+
+  inline InOutRegMasks& FprOuts(FRegister reg) { fpr_outs_ |= (1u << reg); return *this; }
+  inline InOutRegMasks& FprOuts(VectorRegister reg) { return FprOuts(static_cast<FRegister>(reg)); }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& FprOuts(T one, Ts... more) { FprOuts(one); FprOuts(more...); return *this; }
+
+  inline InOutRegMasks& FprIns(FRegister reg) { fpr_ins_ |= (1u << reg); return *this; }
+  inline InOutRegMasks& FprIns(VectorRegister reg) { return FprIns(static_cast<FRegister>(reg)); }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& FprIns(T one, Ts... more) { FprIns(one); FprIns(more...); return *this; }
+
+  inline InOutRegMasks& FprInOuts(FRegister reg) { FprIns(reg); FprOuts(reg); return *this; }
+  inline InOutRegMasks& FprInOuts(VectorRegister reg) {
+    return FprInOuts(static_cast<FRegister>(reg));
+  }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& FprInOuts(T one, Ts... more) {
+    FprInOuts(one);
+    FprInOuts(more...);
+    return *this;
+  }
+
+  inline InOutRegMasks& CcOuts(int cc) { cc_outs_ |= (1u << cc); return *this; }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& CcOuts(T one, Ts... more) { CcOuts(one); CcOuts(more...); return *this; }
+
+  inline InOutRegMasks& CcIns(int cc) { cc_ins_ |= (1u << cc); return *this; }
+  template<typename T, typename... Ts>
+  inline InOutRegMasks& CcIns(T one, Ts... more) { CcIns(one); CcIns(more...); return *this; }
+
+  // Mask of output GPRs for the instruction.
+  uint32_t gpr_outs_;
+  // Mask of input GPRs for the instruction.
+  uint32_t gpr_ins_;
+  // Mask of output FPRs for the instruction.
+  uint32_t fpr_outs_;
+  // Mask of input FPRs for the instruction.
+  uint32_t fpr_ins_;
+  // Mask of output FPU condition code flags for the instruction.
+  uint32_t cc_outs_;
+  // Mask of input FPU condition code flags for the instruction.
+  uint32_t cc_ins_;
+
+  // TODO: add LO and HI.
+};
+
 class MipsLabel : public Label {
  public:
   MipsLabel() : prev_branch_id_plus_one_(0) {}
@@ -462,6 +537,16 @@
   void FloorWS(FRegister fd, FRegister fs);
   void FloorWD(FRegister fd, FRegister fs);
 
+  // Note, the 32 LSBs of a 64-bit value must be loaded into an FPR before the 32 MSBs
+  // when loading the value as 32-bit halves. This applies to all 32-bit FPR loads:
+  // Mtc1(), Mthc1(), MoveToFpuHigh(), Lwc1(). Even if you need two Mtc1()'s or two
+  // Lwc1()'s to load a pair of 32-bit FPRs and these loads do not interfere with one
+  // another (unlike Mtc1() and Mthc1() with 64-bit FPRs), maintain the order:
+  // low then high.
+  //
+  // Also, prefer MoveFromFpuHigh()/MoveToFpuHigh() over Mfhc1()/Mthc1() and Mfc1()/Mtc1().
+  // This will save you some if statements.
+  FRegister GetFpuRegLow(FRegister reg);
   void Mfc1(Register rt, FRegister fs);
   void Mtc1(Register rt, FRegister fs);
   void Mfhc1(Register rt, FRegister fs);
@@ -1337,23 +1422,13 @@
   // Used to make the decision of moving the instruction into a delay slot.
   struct DelaySlot {
     DelaySlot();
+
     // Encoded instruction that may be used to fill the delay slot or 0
     // (0 conveniently represents NOP).
     uint32_t instruction_;
-    // Mask of output GPRs for the instruction.
-    uint32_t gpr_outs_mask_;
-    // Mask of input GPRs for the instruction.
-    uint32_t gpr_ins_mask_;
-    // Mask of output FPRs for the instruction.
-    uint32_t fpr_outs_mask_;
-    // Mask of input FPRs for the instruction.
-    uint32_t fpr_ins_mask_;
-    // Mask of output FPU condition code flags for the instruction.
-    uint32_t cc_outs_mask_;
-    // Mask of input FPU condition code flags for the instruction.
-    uint32_t cc_ins_mask_;
-    // Branches never operate on the LO and HI registers, hence there's
-    // no mask for LO and HI.
+
+    // Input/output register masks.
+    InOutRegMasks masks_;
 
     // Label for patchable instructions to allow moving them into delay slots.
     MipsLabel* patcher_label_;
@@ -1646,30 +1721,8 @@
   void FinalizeLabeledBranch(MipsLabel* label);
 
   // Various helpers for branch delay slot management.
-  void DsFsmInstr(uint32_t instruction,
-                  uint32_t gpr_outs_mask,
-                  uint32_t gpr_ins_mask,
-                  uint32_t fpr_outs_mask,
-                  uint32_t fpr_ins_mask,
-                  uint32_t cc_outs_mask,
-                  uint32_t cc_ins_mask,
-                  MipsLabel* patcher_label = nullptr);
+  InOutRegMasks& DsFsmInstr(uint32_t instruction, MipsLabel* patcher_label = nullptr);
   void DsFsmInstrNop(uint32_t instruction);
-  void DsFsmInstrRrr(uint32_t instruction,
-                     Register out,
-                     Register in1,
-                     Register in2,
-                     MipsLabel* patcher_label = nullptr);
-  void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3);
-  void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2);
-  void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3);
-  void DsFsmInstrFffr(uint32_t instruction, FRegister in1_out, FRegister in2, Register in3);
-  void DsFsmInstrRf(uint32_t instruction, Register out, FRegister in);
-  void DsFsmInstrFr(uint32_t instruction, FRegister out, Register in);
-  void DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2);
-  void DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2);
-  void DsFsmInstrRrrc(uint32_t instruction, Register in1_out, Register in2, int cc_in);
-  void DsFsmInstrFffc(uint32_t instruction, FRegister in1_out, FRegister in2, int cc_in);
   void DsFsmLabel();
   void DsFsmCommitLabel();
   void DsFsmDropLabel();
diff --git a/compiler/utils/mips/constants_mips.h b/compiler/utils/mips/constants_mips.h
index b4dfdbd..016c0db 100644
--- a/compiler/utils/mips/constants_mips.h
+++ b/compiler/utils/mips/constants_mips.h
@@ -19,8 +19,9 @@
 
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "arch/mips/registers_mips.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h
index bc8e40b..310f23c 100644
--- a/compiler/utils/mips64/constants_mips64.h
+++ b/compiler/utils/mips64/constants_mips64.h
@@ -19,8 +19,9 @@
 
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "arch/mips64/registers_mips64.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index 12d113d..1f9ad42 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -22,7 +22,6 @@
 #include <numeric>
 
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "thread-current-inl.h"
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 2280f8b..76df527 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -24,7 +24,8 @@
 #include <set>
 #include <vector>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "base/mutex.h"
 
diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h
index 0da30fe..441ef8e 100644
--- a/compiler/utils/test_dex_file_builder.h
+++ b/compiler/utils/test_dex_file_builder.h
@@ -24,8 +24,9 @@
 #include <set>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "dex_file_loader.h"
 #include "standard_dex_file.h"
 
diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h
index 0bc1560..2e03b9f 100644
--- a/compiler/utils/x86/constants_x86.h
+++ b/compiler/utils/x86/constants_x86.h
@@ -19,8 +19,9 @@
 
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "arch/x86/registers_x86.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index cc508a1..2af3e7b 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -19,8 +19,9 @@
 
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "arch/x86_64/registers_x86_64.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 4709fd0..ee1d7c6 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -158,11 +158,10 @@
     while (it.HasNextDirectMethod()) {
       ArtMethod* resolved_method =
           class_linker_->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
-              *primary_dex_file_,
               it.GetMemberIndex(),
               dex_cache_handle,
               class_loader_handle,
-              nullptr,
+              /* referrer */ nullptr,
               it.GetMethodInvokeType(*class_def));
       CHECK(resolved_method != nullptr);
       if (method_name == resolved_method->GetName()) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 8137fb1..7cb04f2 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -345,7 +345,7 @@
              CompilerOptions::kDefaultInlineMaxCodeUnits);
   UsageError("      Default: %d", CompilerOptions::kDefaultInlineMaxCodeUnits);
   UsageError("");
-  UsageError("  --dump-timing: display a breakdown of where time was spent");
+  UsageError("  --dump-timings: display a breakdown of where time was spent");
   UsageError("");
   UsageError("  -g");
   UsageError("  --generate-debug-info: Generate debug information for native debugging,");
@@ -628,10 +628,6 @@
       opened_dex_files_maps_(),
       opened_dex_files_(),
       no_inline_from_dex_files_(),
-      dump_stats_(false),
-      dump_passes_(false),
-      dump_timing_(false),
-      dump_slow_timing_(kIsDebugBuild),
       avoid_storing_invocation_(false),
       swap_fd_(kInvalidFd),
       app_image_fd_(kInvalidFd),
@@ -1221,9 +1217,6 @@
     }
 
     AssignTrueIfExists(args, M::Host, &is_host_);
-    AssignTrueIfExists(args, M::DumpTiming, &dump_timing_);
-    AssignTrueIfExists(args, M::DumpPasses, &dump_passes_);
-    AssignTrueIfExists(args, M::DumpStats, &dump_stats_);
     AssignTrueIfExists(args, M::AvoidStoringInvocation, &avoid_storing_invocation_);
     AssignTrueIfExists(args, M::MultiImage, &multi_image_);
 
@@ -1726,7 +1719,6 @@
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
 
     TimingLogger::ScopedTiming t("dex2oat Compile", timings_);
-    compiler_phases_timings_.reset(new CumulativeLogger("compilation times"));
 
     // Find the dex files we should not inline from.
     std::vector<std::string> no_inline_filters;
@@ -1787,9 +1779,6 @@
                                      compiled_classes_.release(),
                                      compiled_methods_.release(),
                                      thread_count_,
-                                     dump_stats_,
-                                     dump_passes_,
-                                     compiler_phases_timings_.get(),
                                      swap_fd_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
@@ -2202,12 +2191,10 @@
   }
 
   void DumpTiming() {
-    if (dump_timing_ || (dump_slow_timing_ && timings_->GetTotalNs() > MsToNs(1000))) {
+    if (compiler_options_->GetDumpTimings() ||
+        (kIsDebugBuild && timings_->GetTotalNs() > MsToNs(1000))) {
       LOG(INFO) << Dumpable<TimingLogger>(*timings_);
     }
-    if (dump_passes_) {
-      LOG(INFO) << Dumpable<CumulativeLogger>(*driver_->GetTimingsLogger());
-    }
   }
 
   bool IsImage() const {
@@ -2827,7 +2814,7 @@
   // Dex files we are compiling, does not include the class path dex files.
   std::vector<const DexFile*> dex_files_;
   std::string no_inline_from_string_;
-  CompactDexLevel compact_dex_level_ = CompactDexLevel::kCompactDexLevelNone;
+  CompactDexLevel compact_dex_level_ = kDefaultCompactDexLevel;
 
   std::vector<std::unique_ptr<linker::ElfWriter>> elf_writers_;
   std::vector<std::unique_ptr<linker::OatWriter>> oat_writers_;
@@ -2842,10 +2829,6 @@
   // Note that this might contain pointers owned by class_loader_context_.
   std::vector<const DexFile*> no_inline_from_dex_files_;
 
-  bool dump_stats_;
-  bool dump_passes_;
-  bool dump_timing_;
-  bool dump_slow_timing_;
   bool avoid_storing_invocation_;
   std::string swap_file_name_;
   int swap_fd_;
@@ -2858,7 +2841,6 @@
   int profile_file_fd_;
   std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_;
   TimingLogger* timings_;
-  std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
   std::vector<std::vector<const DexFile*>> dex_files_per_oat_file_;
   std::unordered_map<const DexFile*, size_t> dex_file_oat_index_map_;
 
diff --git a/dex2oat/dex2oat_image_test.cc b/dex2oat/dex2oat_image_test.cc
index a02fbf8..035b395 100644
--- a/dex2oat/dex2oat_image_test.cc
+++ b/dex2oat/dex2oat_image_test.cc
@@ -22,10 +22,11 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#include <android-base/logging.h>
+
 #include "common_runtime_test.h"
 
 #include "base/file_utils.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
diff --git a/dex2oat/dex2oat_options.cc b/dex2oat/dex2oat_options.cc
index 7f177b9..d9b4ea7 100644
--- a/dex2oat/dex2oat_options.cc
+++ b/dex2oat/dex2oat_options.cc
@@ -220,12 +220,6 @@
           .IntoKey(M::Backend)
       .Define("--host")
           .IntoKey(M::Host)
-      .Define("--dump-timing")
-          .IntoKey(M::DumpTiming)
-      .Define("--dump-passes")
-          .IntoKey(M::DumpPasses)
-      .Define("--dump-stats")
-          .IntoKey(M::DumpStats)
       .Define("--avoid-storing-invocation")
           .IntoKey(M::AvoidStoringInvocation)
       .Define("--very-large-app-threshold=_")
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index ad287b0..8805aa1 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -22,11 +22,11 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "common_runtime_test.h"
 
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex-inl.h"
 #include "bytecode_utils.h"
diff --git a/dex2oat/linker/elf_writer_quick.cc b/dex2oat/linker/elf_writer_quick.cc
index b139a12..aa64b7d 100644
--- a/dex2oat/linker/elf_writer_quick.cc
+++ b/dex2oat/linker/elf_writer_quick.cc
@@ -20,8 +20,9 @@
 #include <unordered_map>
 #include <unordered_set>
 
+#include <android-base/logging.h>
+
 #include "base/casts.h"
-#include "base/logging.h"
 #include "compiled_method.h"
 #include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
@@ -66,7 +67,7 @@
   void Run(Thread*) {
     result_ = debug::MakeMiniDebugInfo(isa_,
                                        instruction_set_features_,
-                                       rodata_section_size_,
+                                       kPageSize + rodata_section_size_,  // .text address.
                                        text_section_size_,
                                        method_infos_);
   }
@@ -172,6 +173,7 @@
 void ElfWriterQuick<ElfTypes>::Start() {
   builder_->Start();
   if (compiler_options_->GetGenerateBuildId()) {
+    builder_->GetBuildId()->AllocateVirtualMemory(builder_->GetBuildId()->GetSize());
     builder_->WriteBuildIdSection();
   }
 }
@@ -224,9 +226,6 @@
 
 template <typename ElfTypes>
 void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
-  if (bss_size_ != 0u) {
-    builder_->GetBss()->WriteNoBitsSection(bss_size_);
-  }
   if (builder_->GetIsa() == InstructionSet::kMips ||
       builder_->GetIsa() == InstructionSet::kMips64) {
     builder_->WriteMIPSabiflagsSection();
diff --git a/dex2oat/linker/image_test.h b/dex2oat/linker/image_test.h
index cedbccf..85145d3 100644
--- a/dex2oat/linker/image_test.h
+++ b/dex2oat/linker/image_test.h
@@ -293,14 +293,7 @@
       bool image_space_ok = writer->PrepareImageAddressSpace();
       ASSERT_TRUE(image_space_ok);
 
-      for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
-        std::unique_ptr<BufferedOutputStream> vdex_out =
-            std::make_unique<BufferedOutputStream>(
-                std::make_unique<FileOutputStream>(vdex_files[i].GetFile()));
-        oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
-        oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
-      }
-
+      DCHECK_EQ(vdex_files.size(), oat_files.size());
       for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
         MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
                                         driver->GetInstructionSetFeatures());
@@ -308,6 +301,14 @@
         ElfWriter* const elf_writer = elf_writers[i].get();
         std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
         oat_writer->Initialize(driver, writer.get(), cur_dex_files);
+
+        std::unique_ptr<BufferedOutputStream> vdex_out =
+            std::make_unique<BufferedOutputStream>(
+                std::make_unique<FileOutputStream>(vdex_files[i].GetFile()));
+        oat_writer->WriteVerifierDeps(vdex_out.get(), nullptr);
+        oat_writer->WriteQuickeningInfo(vdex_out.get());
+        oat_writer->WriteChecksumsAndVdexHeader(vdex_out.get());
+
         oat_writer->PrepareLayout(&patcher);
         size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
         size_t text_size = oat_writer->GetOatSize() - rodata_size;
diff --git a/dex2oat/linker/image_writer.cc b/dex2oat/linker/image_writer.cc
index 68c9f80..738bbf8 100644
--- a/dex2oat/linker/image_writer.cc
+++ b/dex2oat/linker/image_writer.cc
@@ -29,7 +29,7 @@
 #include "art_method-inl.h"
 #include "base/callee_save_type.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "compiled_method.h"
@@ -365,7 +365,7 @@
 
   size_t oat_index = GetOatIndex(object);
   ImageInfo& image_info = GetImageInfo(oat_index);
-  size_t bin_slot_offset = image_info.bin_slot_offsets_[bin_slot.GetBin()];
+  size_t bin_slot_offset = image_info.GetBinSlotOffset(bin_slot.GetBin());
   size_t new_offset = bin_slot_offset + bin_slot.GetIndex();
   DCHECK_ALIGNED(new_offset, kObjectAlignment);
 
@@ -436,9 +436,10 @@
     auto it = dex_file_oat_index_map_.find(dex_file);
     DCHECK(it != dex_file_oat_index_map_.end()) << dex_file->GetLocation();
     ImageInfo& image_info = GetImageInfo(it->second);
-    image_info.dex_cache_array_starts_.Put(dex_file, image_info.bin_slot_sizes_[kBinDexCacheArray]);
+    image_info.dex_cache_array_starts_.Put(
+        dex_file, image_info.GetBinSlotSize(Bin::kDexCacheArray));
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
-    image_info.bin_slot_sizes_[kBinDexCacheArray] += layout.Size();
+    image_info.IncrementBinSlotSize(Bin::kDexCacheArray, layout.Size());
   }
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -494,7 +495,7 @@
     DCHECK(!IsInBootImage(array));
     size_t oat_index = GetOatIndexForDexCache(dex_cache);
     native_object_relocations_.emplace(array,
-        NativeObjectRelocation { oat_index, offset, kNativeObjectRelocationTypeDexCacheArray });
+        NativeObjectRelocation { oat_index, offset, NativeObjectRelocationType::kDexCacheArray });
   }
 }
 
@@ -512,7 +513,7 @@
   }
   // kBinArtMethodClean picked arbitrarily, just required to differentiate between ArtFields and
   // ArtMethods.
-  pointer_arrays_.emplace(arr, kBinArtMethodClean);
+  pointer_arrays_.emplace(arr, Bin::kArtMethodClean);
 }
 
 void ImageWriter::AssignImageBinSlot(mirror::Object* object, size_t oat_index) {
@@ -528,8 +529,7 @@
   //
   // This means more pages will stay either clean or shared dirty (with zygote) and
   // the app will use less of its own (private) memory.
-  Bin bin = kBinRegular;
-  size_t current_offset = 0u;
+  Bin bin = Bin::kRegular;
 
   if (kBinObjects) {
     //
@@ -563,7 +563,7 @@
     // so packing them together will not result in a noticeably tighter dirty-to-clean ratio.
     //
     if (object->IsClass()) {
-      bin = kBinClassVerified;
+      bin = Bin::kClassVerified;
       mirror::Class* klass = object->AsClass();
 
       // Add non-embedded vtable to the pointer array table if there is one.
@@ -584,15 +584,15 @@
       //   - classes with dirty static fields.
       if (dirty_image_objects_ != nullptr &&
           dirty_image_objects_->find(klass->PrettyDescriptor()) != dirty_image_objects_->end()) {
-        bin = kBinKnownDirty;
+        bin = Bin::kKnownDirty;
       } else if (klass->GetStatus() == Class::kStatusInitialized) {
-        bin = kBinClassInitialized;
+        bin = Bin::kClassInitialized;
 
         // If the class's static fields are all final, put it into a separate bin
         // since it's very likely it will stay clean.
         uint32_t num_static_fields = klass->NumStaticFields();
         if (num_static_fields == 0) {
-          bin = kBinClassInitializedFinalStatics;
+          bin = Bin::kClassInitializedFinalStatics;
         } else {
           // Maybe all the statics are final?
           bool all_final = true;
@@ -605,20 +605,20 @@
           }
 
           if (all_final) {
-            bin = kBinClassInitializedFinalStatics;
+            bin = Bin::kClassInitializedFinalStatics;
           }
         }
       }
     } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
-      bin = kBinString;  // Strings are almost always immutable (except for object header).
+      bin = Bin::kString;  // Strings are almost always immutable (except for object header).
     } else if (object->GetClass<kVerifyNone>() ==
         Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kJavaLangObject)) {
       // Instance of java lang object, probably a lock object. This means it will be dirty when we
       // synchronize on it.
-      bin = kBinMiscDirty;
+      bin = Bin::kMiscDirty;
     } else if (object->IsDexCache()) {
       // Dex file field becomes dirty when the image is loaded.
-      bin = kBinMiscDirty;
+      bin = Bin::kMiscDirty;
     }
     // else bin = kBinRegular
   }
@@ -630,14 +630,15 @@
   ImageInfo& image_info = GetImageInfo(oat_index);
 
   size_t offset_delta = RoundUp(object_size, kObjectAlignment);  // 64-bit alignment
-  current_offset = image_info.bin_slot_sizes_[bin];  // How many bytes the current bin is at (aligned).
+  // How many bytes the current bin is at (aligned).
+  size_t current_offset = image_info.GetBinSlotSize(bin);
   // Move the current bin size up to accommodate the object we just assigned a bin slot.
-  image_info.bin_slot_sizes_[bin] += offset_delta;
+  image_info.IncrementBinSlotSize(bin, offset_delta);
 
   BinSlot new_bin_slot(bin, current_offset);
   SetImageBinSlot(object, new_bin_slot);
 
-  ++image_info.bin_slot_count_[bin];
+  image_info.IncrementBinSlotCount(bin, 1u);
 
   // Grow the image closer to the end by the object we just assigned.
   image_info.image_end_ += offset_delta;
@@ -665,7 +666,7 @@
     BinSlot bin_slot(offset);
     size_t oat_index = GetOatIndex(object);
     const ImageInfo& image_info = GetImageInfo(oat_index);
-    DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()])
+    DCHECK_LT(bin_slot.GetIndex(), image_info.GetBinSlotSize(bin_slot.GetBin()))
         << "bin slot offset should not exceed the size of that bin";
   }
   return true;
@@ -682,7 +683,7 @@
   BinSlot bin_slot(static_cast<uint32_t>(offset));
   size_t oat_index = GetOatIndex(object);
   const ImageInfo& image_info = GetImageInfo(oat_index);
-  DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]);
+  DCHECK_LT(bin_slot.GetIndex(), image_info.GetBinSlotSize(bin_slot.GetBin()));
 
   return bin_slot;
 }
@@ -1049,8 +1050,7 @@
     const DexFile::MethodId& method_id = dex_file.GetMethodId(i);
     if (method_id.class_idx_ != last_class_idx) {
       last_class_idx = method_id.class_idx_;
-      last_class = class_linker->LookupResolvedType(
-          dex_file, last_class_idx, dex_cache, class_loader);
+      last_class = class_linker->LookupResolvedType(last_class_idx, dex_cache, class_loader);
       if (last_class != nullptr && !KeepClass(last_class)) {
         last_class = nullptr;
       }
@@ -1095,8 +1095,7 @@
     const DexFile::FieldId& field_id = dex_file.GetFieldId(i);
     if (field_id.class_idx_ != last_class_idx) {
       last_class_idx = field_id.class_idx_;
-      last_class = class_linker->LookupResolvedType(
-          dex_file, last_class_idx, dex_cache, class_loader);
+      last_class = class_linker->LookupResolvedType(last_class_idx, dex_cache, class_loader);
       if (last_class != nullptr && !KeepClass(last_class)) {
         last_class = nullptr;
       }
@@ -1129,7 +1128,7 @@
     uint32_t stored_index = pair.index;
     ObjPtr<mirror::Class> klass = pair.object.Read();
     if (klass == nullptr || i < stored_index) {
-      klass = class_linker->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader);
+      klass = class_linker->LookupResolvedType(type_idx, dex_cache, class_loader);
       if (klass != nullptr) {
         DCHECK_EQ(dex_cache->GetResolvedType(type_idx), klass);
         stored_index = i;  // For correct clearing below if not keeping the `klass`.
@@ -1147,7 +1146,7 @@
     uint32_t stored_index = pair.index;
     ObjPtr<mirror::String> string = pair.object.Read();
     if (string == nullptr || i < stored_index) {
-      string = class_linker->LookupString(dex_file, string_idx, dex_cache);
+      string = class_linker->LookupString(string_idx, dex_cache);
       DCHECK(string == nullptr || dex_cache->GetResolvedString(string_idx) == string);
     }
   }
@@ -1402,12 +1401,12 @@
           auto it = native_object_relocations_.find(cur_fields);
           CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields
                                                   << " already forwarded";
-          size_t& offset = image_info.bin_slot_sizes_[kBinArtField];
+          size_t offset = image_info.GetBinSlotSize(Bin::kArtField);
           DCHECK(!IsInBootImage(cur_fields));
           native_object_relocations_.emplace(
               cur_fields,
               NativeObjectRelocation {
-                  oat_index, offset, kNativeObjectRelocationTypeArtFieldArray
+                  oat_index, offset, NativeObjectRelocationType::kArtFieldArray
               });
           offset += header_size;
           // Forward individual fields so that we can quickly find where they belong.
@@ -1420,9 +1419,14 @@
             DCHECK(!IsInBootImage(field));
             native_object_relocations_.emplace(
                 field,
-                NativeObjectRelocation { oat_index, offset, kNativeObjectRelocationTypeArtField });
+                NativeObjectRelocation { oat_index,
+                                         offset,
+                                         NativeObjectRelocationType::kArtField });
             offset += sizeof(ArtField);
           }
+          image_info.IncrementBinSlotSize(
+              Bin::kArtField, header_size + cur_fields->size() * sizeof(ArtField));
+          DCHECK_EQ(offset, image_info.GetBinSlotSize(Bin::kArtField));
         }
       }
       // Visit and assign offsets for methods.
@@ -1436,8 +1440,8 @@
           }
         }
         NativeObjectRelocationType type = any_dirty
-            ? kNativeObjectRelocationTypeArtMethodDirty
-            : kNativeObjectRelocationTypeArtMethodClean;
+            ? NativeObjectRelocationType::kArtMethodDirty
+            : NativeObjectRelocationType::kArtMethodClean;
         Bin bin_type = BinTypeForNativeRelocationType(type);
         // Forward the entire array at once, but header first.
         const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
@@ -1449,15 +1453,15 @@
         auto it = native_object_relocations_.find(array);
         CHECK(it == native_object_relocations_.end())
             << "Method array " << array << " already forwarded";
-        size_t& offset = image_info.bin_slot_sizes_[bin_type];
+        size_t offset = image_info.GetBinSlotSize(bin_type);
         DCHECK(!IsInBootImage(array));
         native_object_relocations_.emplace(array,
             NativeObjectRelocation {
                 oat_index,
                 offset,
-                any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
-                          : kNativeObjectRelocationTypeArtMethodArrayClean });
-        offset += header_size;
+                any_dirty ? NativeObjectRelocationType::kArtMethodArrayDirty
+                          : NativeObjectRelocationType::kArtMethodArrayClean });
+        image_info.IncrementBinSlotSize(bin_type, header_size);
         for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
           AssignMethodOffset(&m, type, oat_index);
         }
@@ -1476,7 +1480,7 @@
             if (imt_method->IsRuntimeMethod() &&
                 !IsInBootImage(imt_method) &&
                 !NativeRelocationAssigned(imt_method)) {
-              AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+              AssignMethodOffset(imt_method, NativeObjectRelocationType::kRuntimeMethod, oat_index);
             }
           }
         }
@@ -1526,9 +1530,9 @@
       imt,
       NativeObjectRelocation {
           oat_index,
-          image_info.bin_slot_sizes_[kBinImTable],
-          kNativeObjectRelocationTypeIMTable});
-  image_info.bin_slot_sizes_[kBinImTable] += size;
+          image_info.GetBinSlotSize(Bin::kImTable),
+          NativeObjectRelocationType::kIMTable});
+  image_info.IncrementBinSlotSize(Bin::kImTable, size);
   return true;
 }
 
@@ -1545,9 +1549,9 @@
       table,
       NativeObjectRelocation {
           oat_index,
-          image_info.bin_slot_sizes_[kBinIMTConflictTable],
-          kNativeObjectRelocationTypeIMTConflictTable});
-  image_info.bin_slot_sizes_[kBinIMTConflictTable] += size;
+          image_info.GetBinSlotSize(Bin::kIMTConflictTable),
+          NativeObjectRelocationType::kIMTConflictTable});
+  image_info.IncrementBinSlotSize(Bin::kIMTConflictTable, size);
 }
 
 void ImageWriter::AssignMethodOffset(ArtMethod* method,
@@ -1560,9 +1564,10 @@
     TryAssignConflictTableOffset(method->GetImtConflictTable(target_ptr_size_), oat_index);
   }
   ImageInfo& image_info = GetImageInfo(oat_index);
-  size_t& offset = image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(type)];
+  Bin bin_type = BinTypeForNativeRelocationType(type);
+  size_t offset = image_info.GetBinSlotSize(bin_type);
   native_object_relocations_.emplace(method, NativeObjectRelocation { oat_index, offset, type });
-  offset += ArtMethod::Size(target_ptr_size_);
+  image_info.IncrementBinSlotSize(bin_type, ArtMethod::Size(target_ptr_size_));
 }
 
 void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) {
@@ -1697,7 +1702,7 @@
     CHECK(m->IsRuntimeMethod());
     DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image";
     if (!IsInBootImage(m)) {
-      AssignMethodOffset(m, kNativeObjectRelocationTypeRuntimeMethod, GetDefaultOatIndex());
+      AssignMethodOffset(m, NativeObjectRelocationType::kRuntimeMethod, GetDefaultOatIndex());
     }
   }
 
@@ -1803,18 +1808,18 @@
   // Calculate bin slot offsets.
   for (ImageInfo& image_info : image_infos_) {
     size_t bin_offset = image_objects_offset_begin_;
-    for (size_t i = 0; i != kBinSize; ++i) {
-      switch (i) {
-        case kBinArtMethodClean:
-        case kBinArtMethodDirty: {
+    for (size_t i = 0; i != kNumberOfBins; ++i) {
+      switch (static_cast<Bin>(i)) {
+        case Bin::kArtMethodClean:
+        case Bin::kArtMethodDirty: {
           bin_offset = RoundUp(bin_offset, method_alignment);
           break;
         }
-        case kBinDexCacheArray:
+        case Bin::kDexCacheArray:
           bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment(target_ptr_size_));
           break;
-        case kBinImTable:
-        case kBinIMTConflictTable: {
+        case Bin::kImTable:
+        case Bin::kIMTConflictTable: {
           bin_offset = RoundUp(bin_offset, static_cast<size_t>(target_ptr_size_));
           break;
         }
@@ -1827,7 +1832,7 @@
     }
     // NOTE: There may be additional padding between the bin slots and the intern table.
     DCHECK_EQ(image_info.image_end_,
-              GetBinSizeSum(image_info, kBinMirrorCount) + image_objects_offset_begin_);
+              image_info.GetBinSizeSum(Bin::kMirrorCount) + image_objects_offset_begin_);
   }
 
   // Calculate image offsets.
@@ -1864,7 +1869,7 @@
     NativeObjectRelocation& relocation = pair.second;
     Bin bin_type = BinTypeForNativeRelocationType(relocation.type);
     ImageInfo& image_info = GetImageInfo(relocation.oat_index);
-    relocation.offset += image_info.bin_slot_offsets_[bin_type];
+    relocation.offset += image_info.GetBinSlotOffset(bin_type);
   }
 }
 
@@ -1881,33 +1886,32 @@
 
   // Add field section.
   ImageSection* field_section = &out_sections[ImageHeader::kSectionArtFields];
-  *field_section = ImageSection(bin_slot_offsets_[kBinArtField], bin_slot_sizes_[kBinArtField]);
-  CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset());
+  *field_section = ImageSection(GetBinSlotOffset(Bin::kArtField), GetBinSlotSize(Bin::kArtField));
 
   // Add method section.
   ImageSection* methods_section = &out_sections[ImageHeader::kSectionArtMethods];
   *methods_section = ImageSection(
-      bin_slot_offsets_[kBinArtMethodClean],
-      bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]);
+      GetBinSlotOffset(Bin::kArtMethodClean),
+      GetBinSlotSize(Bin::kArtMethodClean) + GetBinSlotSize(Bin::kArtMethodDirty));
 
   // IMT section.
   ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables];
-  *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]);
+  *imt_section = ImageSection(GetBinSlotOffset(Bin::kImTable), GetBinSlotSize(Bin::kImTable));
 
   // Conflict tables section.
   ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables];
-  *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable],
-                                              bin_slot_sizes_[kBinIMTConflictTable]);
+  *imt_conflict_tables_section = ImageSection(GetBinSlotOffset(Bin::kIMTConflictTable),
+                                              GetBinSlotSize(Bin::kIMTConflictTable));
 
   // Runtime methods section.
   ImageSection* runtime_methods_section = &out_sections[ImageHeader::kSectionRuntimeMethods];
-  *runtime_methods_section = ImageSection(bin_slot_offsets_[kBinRuntimeMethod],
-                                          bin_slot_sizes_[kBinRuntimeMethod]);
+  *runtime_methods_section = ImageSection(GetBinSlotOffset(Bin::kRuntimeMethod),
+                                          GetBinSlotSize(Bin::kRuntimeMethod));
 
   // Add dex cache arrays section.
   ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
-  *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray],
-                                           bin_slot_sizes_[kBinDexCacheArray]);
+  *dex_cache_arrays_section = ImageSection(GetBinSlotOffset(Bin::kDexCacheArray),
+                                           GetBinSlotSize(Bin::kDexCacheArray));
   // For boot image, round up to the page boundary to separate the interned strings and
   // class table from the modifiable data. We shall mprotect() these pages read-only when
   // we load the boot image. This is more than sufficient for the string table alignment,
@@ -2060,16 +2064,16 @@
     DCHECK_GE(dest, image_info.image_->Begin() + image_info.image_end_);
     DCHECK(!IsInBootImage(pair.first));
     switch (relocation.type) {
-      case kNativeObjectRelocationTypeArtField: {
+      case NativeObjectRelocationType::kArtField: {
         memcpy(dest, pair.first, sizeof(ArtField));
         CopyReference(
             reinterpret_cast<ArtField*>(dest)->GetDeclaringClassAddressWithoutBarrier(),
             reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr());
         break;
       }
-      case kNativeObjectRelocationTypeRuntimeMethod:
-      case kNativeObjectRelocationTypeArtMethodClean:
-      case kNativeObjectRelocationTypeArtMethodDirty: {
+      case NativeObjectRelocationType::kRuntimeMethod:
+      case NativeObjectRelocationType::kArtMethodClean:
+      case NativeObjectRelocationType::kArtMethodDirty: {
         CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first),
                            reinterpret_cast<ArtMethod*>(dest),
                            image_info);
@@ -2077,12 +2081,12 @@
       }
       // For arrays, copy just the header since the elements will get copied by their corresponding
       // relocations.
-      case kNativeObjectRelocationTypeArtFieldArray: {
+      case NativeObjectRelocationType::kArtFieldArray: {
         memcpy(dest, pair.first, LengthPrefixedArray<ArtField>::ComputeSize(0));
         break;
       }
-      case kNativeObjectRelocationTypeArtMethodArrayClean:
-      case kNativeObjectRelocationTypeArtMethodArrayDirty: {
+      case NativeObjectRelocationType::kArtMethodArrayClean:
+      case NativeObjectRelocationType::kArtMethodArrayDirty: {
         size_t size = ArtMethod::Size(target_ptr_size_);
         size_t alignment = ArtMethod::Alignment(target_ptr_size_);
         memcpy(dest, pair.first, LengthPrefixedArray<ArtMethod>::ComputeSize(0, size, alignment));
@@ -2090,16 +2094,16 @@
         reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(dest)->ClearPadding(size, alignment);
         break;
       }
-      case kNativeObjectRelocationTypeDexCacheArray:
+      case NativeObjectRelocationType::kDexCacheArray:
         // Nothing to copy here, everything is done in FixupDexCache().
         break;
-      case kNativeObjectRelocationTypeIMTable: {
+      case NativeObjectRelocationType::kIMTable: {
         ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first);
         ImTable* dest_imt = reinterpret_cast<ImTable*>(dest);
         CopyAndFixupImTable(orig_imt, dest_imt);
         break;
       }
-      case kNativeObjectRelocationTypeIMTConflictTable: {
+      case NativeObjectRelocationType::kIMTConflictTable: {
         auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first);
         CopyAndFixupImtConflictTable(
             orig_table,
@@ -2197,7 +2201,7 @@
                      << method << " idx=" << i << "/" << num_elements << " with declaring class "
                      << Class::PrettyClass(method->GetDeclaringClass());
         } else {
-          CHECK_EQ(array_type, kBinArtField);
+          CHECK_EQ(array_type, Bin::kArtField);
           auto* field = reinterpret_cast<ArtField*>(elem);
           LOG(FATAL) << "No relocation entry for ArtField " << field->PrettyField() << " @ "
               << field << " idx=" << i << "/" << num_elements << " with declaring class "
@@ -2518,8 +2522,8 @@
   copy_dex_cache->SetDexFile(nullptr);
 }
 
-const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
-  DCHECK_LT(type, kOatAddressCount);
+const uint8_t* ImageWriter::GetOatAddress(StubType type) const {
+  DCHECK_LE(type, StubType::kLast);
   // If we are compiling an app image, we need to use the stubs of the boot image.
   if (compile_app_image_) {
     // Use the current image pointers.
@@ -2531,26 +2535,26 @@
     const OatHeader& header = oat_file->GetOatHeader();
     switch (type) {
       // TODO: We could maybe clean this up if we stored them in an array in the oat header.
-      case kOatAddressQuickGenericJNITrampoline:
+      case StubType::kQuickGenericJNITrampoline:
         return static_cast<const uint8_t*>(header.GetQuickGenericJniTrampoline());
-      case kOatAddressInterpreterToInterpreterBridge:
+      case StubType::kInterpreterToInterpreterBridge:
         return static_cast<const uint8_t*>(header.GetInterpreterToInterpreterBridge());
-      case kOatAddressInterpreterToCompiledCodeBridge:
+      case StubType::kInterpreterToCompiledCodeBridge:
         return static_cast<const uint8_t*>(header.GetInterpreterToCompiledCodeBridge());
-      case kOatAddressJNIDlsymLookup:
+      case StubType::kJNIDlsymLookup:
         return static_cast<const uint8_t*>(header.GetJniDlsymLookup());
-      case kOatAddressQuickIMTConflictTrampoline:
+      case StubType::kQuickIMTConflictTrampoline:
         return static_cast<const uint8_t*>(header.GetQuickImtConflictTrampoline());
-      case kOatAddressQuickResolutionTrampoline:
+      case StubType::kQuickResolutionTrampoline:
         return static_cast<const uint8_t*>(header.GetQuickResolutionTrampoline());
-      case kOatAddressQuickToInterpreterBridge:
+      case StubType::kQuickToInterpreterBridge:
         return static_cast<const uint8_t*>(header.GetQuickToInterpreterBridge());
       default:
         UNREACHABLE();
     }
   }
   const ImageInfo& primary_image_info = GetImageInfo(0);
-  return GetOatAddressForOffset(primary_image_info.oat_address_offsets_[type], primary_image_info);
+  return GetOatAddressForOffset(primary_image_info.GetStubOffset(type), primary_image_info);
 }
 
 const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method,
@@ -2586,16 +2590,16 @@
   } else if (quick_code == nullptr && method->IsNative() &&
       (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) {
     // Non-static or initialized native method missing compiled code, use generic JNI version.
-    quick_code = GetOatAddress(kOatAddressQuickGenericJNITrampoline);
+    quick_code = GetOatAddress(StubType::kQuickGenericJNITrampoline);
   } else if (quick_code == nullptr && !method->IsNative()) {
     // We don't have code at all for a non-native method, use the interpreter.
-    quick_code = GetOatAddress(kOatAddressQuickToInterpreterBridge);
+    quick_code = GetOatAddress(StubType::kQuickToInterpreterBridge);
     *quick_is_interpreted = true;
   } else {
     CHECK(!method->GetDeclaringClass()->IsInitialized());
     // We have code for a static method, but need to go through the resolution stub for class
     // initialization.
-    quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline);
+    quick_code = GetOatAddress(StubType::kQuickResolutionTrampoline);
   }
   if (!IsInBootOatFile(quick_code)) {
     // DCHECK_GE(quick_code, oat_data_begin_);
@@ -2630,11 +2634,11 @@
     if (orig_table != nullptr) {
       // Special IMT conflict method, normal IMT conflict method or unimplemented IMT method.
       copy->SetEntryPointFromQuickCompiledCodePtrSize(
-          GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_);
+          GetOatAddress(StubType::kQuickIMTConflictTrampoline), target_ptr_size_);
       copy->SetImtConflictTable(NativeLocationInImage(orig_table), target_ptr_size_);
     } else if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
       copy->SetEntryPointFromQuickCompiledCodePtrSize(
-          GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
+          GetOatAddress(StubType::kQuickResolutionTrampoline), target_ptr_size_);
     } else {
       bool found_one = false;
       for (size_t i = 0; i < static_cast<size_t>(CalleeSaveType::kLastCalleeSaveType); ++i) {
@@ -2653,7 +2657,7 @@
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
     if (UNLIKELY(!orig->IsInvokable())) {
       copy->SetEntryPointFromQuickCompiledCodePtrSize(
-          GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_);
+          GetOatAddress(StubType::kQuickToInterpreterBridge), target_ptr_size_);
     } else {
       bool quick_is_interpreted;
       const uint8_t* quick_code = GetQuickCode(orig, image_info, &quick_is_interpreted);
@@ -2664,17 +2668,17 @@
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
         copy->SetEntryPointFromJniPtrSize(
-            GetOatAddress(kOatAddressJNIDlsymLookup), target_ptr_size_);
+            GetOatAddress(StubType::kJNIDlsymLookup), target_ptr_size_);
       }
     }
   }
 }
 
-size_t ImageWriter::GetBinSizeSum(ImageWriter::ImageInfo& image_info, ImageWriter::Bin up_to) const {
-  DCHECK_LE(up_to, kBinSize);
-  return std::accumulate(&image_info.bin_slot_sizes_[0],
-                         &image_info.bin_slot_sizes_[up_to],
-                         /*init*/0);
+size_t ImageWriter::ImageInfo::GetBinSizeSum(Bin up_to) const {
+  DCHECK_LE(static_cast<size_t>(up_to), kNumberOfBins);
+  return std::accumulate(&bin_slot_sizes_[0],
+                         &bin_slot_sizes_[0] + static_cast<size_t>(up_to),
+                         /*init*/ static_cast<size_t>(0));
 }
 
 ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) {
@@ -2683,7 +2687,7 @@
   static_assert(kBinShift == 27, "wrong number of shift");
   static_assert(sizeof(BinSlot) == sizeof(LockWord), "BinSlot/LockWord must have equal sizes");
 
-  DCHECK_LT(GetBin(), kBinSize);
+  DCHECK_LT(GetBin(), Bin::kMirrorCount);
   DCHECK_ALIGNED(GetIndex(), kObjectAlignment);
 }
 
@@ -2702,23 +2706,23 @@
 
 ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocationType type) {
   switch (type) {
-    case kNativeObjectRelocationTypeArtField:
-    case kNativeObjectRelocationTypeArtFieldArray:
-      return kBinArtField;
-    case kNativeObjectRelocationTypeArtMethodClean:
-    case kNativeObjectRelocationTypeArtMethodArrayClean:
-      return kBinArtMethodClean;
-    case kNativeObjectRelocationTypeArtMethodDirty:
-    case kNativeObjectRelocationTypeArtMethodArrayDirty:
-      return kBinArtMethodDirty;
-    case kNativeObjectRelocationTypeDexCacheArray:
-      return kBinDexCacheArray;
-    case kNativeObjectRelocationTypeRuntimeMethod:
-      return kBinRuntimeMethod;
-    case kNativeObjectRelocationTypeIMTable:
-      return kBinImTable;
-    case kNativeObjectRelocationTypeIMTConflictTable:
-      return kBinIMTConflictTable;
+    case NativeObjectRelocationType::kArtField:
+    case NativeObjectRelocationType::kArtFieldArray:
+      return Bin::kArtField;
+    case NativeObjectRelocationType::kArtMethodClean:
+    case NativeObjectRelocationType::kArtMethodArrayClean:
+      return Bin::kArtMethodClean;
+    case NativeObjectRelocationType::kArtMethodDirty:
+    case NativeObjectRelocationType::kArtMethodArrayDirty:
+      return Bin::kArtMethodDirty;
+    case NativeObjectRelocationType::kDexCacheArray:
+      return Bin::kDexCacheArray;
+    case NativeObjectRelocationType::kRuntimeMethod:
+      return Bin::kRuntimeMethod;
+    case NativeObjectRelocationType::kIMTable:
+      return Bin::kImTable;
+    case NativeObjectRelocationType::kIMTConflictTable:
+      return Bin::kIMTConflictTable;
   }
   UNREACHABLE();
 }
@@ -2782,20 +2786,20 @@
 
   if (oat_index == GetDefaultOatIndex()) {
     // Primary oat file, read the trampolines.
-    cur_image_info.oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] =
-        oat_header.GetInterpreterToInterpreterBridgeOffset();
-    cur_image_info.oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] =
-        oat_header.GetInterpreterToCompiledCodeBridgeOffset();
-    cur_image_info.oat_address_offsets_[kOatAddressJNIDlsymLookup] =
-        oat_header.GetJniDlsymLookupOffset();
-    cur_image_info.oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] =
-        oat_header.GetQuickGenericJniTrampolineOffset();
-    cur_image_info.oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] =
-        oat_header.GetQuickImtConflictTrampolineOffset();
-    cur_image_info.oat_address_offsets_[kOatAddressQuickResolutionTrampoline] =
-        oat_header.GetQuickResolutionTrampolineOffset();
-    cur_image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
-        oat_header.GetQuickToInterpreterBridgeOffset();
+    cur_image_info.SetStubOffset(StubType::kInterpreterToInterpreterBridge,
+                                 oat_header.GetInterpreterToInterpreterBridgeOffset());
+    cur_image_info.SetStubOffset(StubType::kInterpreterToCompiledCodeBridge,
+                                 oat_header.GetInterpreterToCompiledCodeBridgeOffset());
+    cur_image_info.SetStubOffset(StubType::kJNIDlsymLookup,
+                                 oat_header.GetJniDlsymLookupOffset());
+    cur_image_info.SetStubOffset(StubType::kQuickGenericJNITrampoline,
+                                 oat_header.GetQuickGenericJniTrampolineOffset());
+    cur_image_info.SetStubOffset(StubType::kQuickIMTConflictTrampoline,
+                                 oat_header.GetQuickImtConflictTrampolineOffset());
+    cur_image_info.SetStubOffset(StubType::kQuickResolutionTrampoline,
+                                 oat_header.GetQuickResolutionTrampolineOffset());
+    cur_image_info.SetStubOffset(StubType::kQuickToInterpreterBridge,
+                                 oat_header.GetQuickToInterpreterBridgeOffset());
   }
 }
 
diff --git a/dex2oat/linker/image_writer.h b/dex2oat/linker/image_writer.h
index 68c7b59..3aceceb 100644
--- a/dex2oat/linker/image_writer.h
+++ b/dex2oat/linker/image_writer.h
@@ -161,70 +161,70 @@
 
   // Classify different kinds of bins that objects end up getting packed into during image writing.
   // Ordered from dirtiest to cleanest (until ArtMethods).
-  enum Bin {
-    kBinKnownDirty,               // Known dirty objects from --dirty-image-objects list
-    kBinMiscDirty,                // Dex caches, object locks, etc...
-    kBinClassVerified,            // Class verified, but initializers haven't been run
+  enum class Bin {
+    kKnownDirty,                  // Known dirty objects from --dirty-image-objects list
+    kMiscDirty,                   // Dex caches, object locks, etc...
+    kClassVerified,               // Class verified, but initializers haven't been run
     // Unknown mix of clean/dirty:
-    kBinRegular,
-    kBinClassInitialized,         // Class initializers have been run
+    kRegular,
+    kClassInitialized,            // Class initializers have been run
     // All classes get their own bins since their fields often dirty
-    kBinClassInitializedFinalStatics,  // Class initializers have been run, no non-final statics
+    kClassInitializedFinalStatics,  // Class initializers have been run, no non-final statics
     // Likely-clean:
-    kBinString,                        // [String] Almost always immutable (except for obj header).
+    kString,                      // [String] Almost always immutable (except for obj header).
     // Add more bins here if we add more segregation code.
     // Non mirror fields must be below.
     // ArtFields should be always clean.
-    kBinArtField,
+    kArtField,
     // If the class is initialized, then the ArtMethods are probably clean.
-    kBinArtMethodClean,
+    kArtMethodClean,
     // ArtMethods may be dirty if the class has native methods or a declaring class that isn't
     // initialized.
-    kBinArtMethodDirty,
+    kArtMethodDirty,
     // IMT (clean)
-    kBinImTable,
+    kImTable,
     // Conflict tables (clean).
-    kBinIMTConflictTable,
+    kIMTConflictTable,
     // Runtime methods (always clean, do not have a length prefix array).
-    kBinRuntimeMethod,
+    kRuntimeMethod,
     // Dex cache arrays have a special slot for PC-relative addressing. Since they are
     // huge, and as such their dirtiness is not important for the clean/dirty separation,
     // we arbitrarily keep them at the end of the native data.
-    kBinDexCacheArray,            // Arrays belonging to dex cache.
-    kBinSize,
+    kDexCacheArray,               // Arrays belonging to dex cache.
+    kLast = kDexCacheArray,
     // Number of bins which are for mirror objects.
-    kBinMirrorCount = kBinArtField,
+    kMirrorCount = kArtField,
   };
   friend std::ostream& operator<<(std::ostream& stream, const Bin& bin);
 
-  enum NativeObjectRelocationType {
-    kNativeObjectRelocationTypeArtField,
-    kNativeObjectRelocationTypeArtFieldArray,
-    kNativeObjectRelocationTypeArtMethodClean,
-    kNativeObjectRelocationTypeArtMethodArrayClean,
-    kNativeObjectRelocationTypeArtMethodDirty,
-    kNativeObjectRelocationTypeArtMethodArrayDirty,
-    kNativeObjectRelocationTypeRuntimeMethod,
-    kNativeObjectRelocationTypeIMTable,
-    kNativeObjectRelocationTypeIMTConflictTable,
-    kNativeObjectRelocationTypeDexCacheArray,
+  enum class NativeObjectRelocationType {
+    kArtField,
+    kArtFieldArray,
+    kArtMethodClean,
+    kArtMethodArrayClean,
+    kArtMethodDirty,
+    kArtMethodArrayDirty,
+    kRuntimeMethod,
+    kIMTable,
+    kIMTConflictTable,
+    kDexCacheArray,
   };
   friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type);
 
-  enum OatAddress {
-    kOatAddressInterpreterToInterpreterBridge,
-    kOatAddressInterpreterToCompiledCodeBridge,
-    kOatAddressJNIDlsymLookup,
-    kOatAddressQuickGenericJNITrampoline,
-    kOatAddressQuickIMTConflictTrampoline,
-    kOatAddressQuickResolutionTrampoline,
-    kOatAddressQuickToInterpreterBridge,
-    // Number of elements in the enum.
-    kOatAddressCount,
+  enum class StubType {
+    kInterpreterToInterpreterBridge,
+    kInterpreterToCompiledCodeBridge,
+    kJNIDlsymLookup,
+    kQuickGenericJNITrampoline,
+    kQuickIMTConflictTrampoline,
+    kQuickResolutionTrampoline,
+    kQuickToInterpreterBridge,
+    kLast = kQuickToInterpreterBridge,
   };
-  friend std::ostream& operator<<(std::ostream& stream, const OatAddress& oat_address);
+  friend std::ostream& operator<<(std::ostream& stream, const StubType& stub_type);
 
-  static constexpr size_t kBinBits = MinimumBitsToStore<uint32_t>(kBinMirrorCount - 1);
+  static constexpr size_t kBinBits =
+      MinimumBitsToStore<uint32_t>(static_cast<size_t>(Bin::kMirrorCount) - 1);
   // uint32 = typeof(lockword_)
   // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
   // failures due to invalid read barrier bits during object field reads.
@@ -232,6 +232,12 @@
   // 111000.....0
   static const size_t kBinMask = ((static_cast<size_t>(1) << kBinBits) - 1) << kBinShift;
 
+  // Number of bins, including non-mirror bins.
+  static constexpr size_t kNumberOfBins = static_cast<size_t>(Bin::kLast) + 1u;
+
+  // Number of stub types.
+  static constexpr size_t kNumberOfStubTypes = static_cast<size_t>(StubType::kLast) + 1u;
+
   // We use the lock word to store the bin # and bin index of the object in the image.
   //
   // The struct size must be exactly sizeof(LockWord), currently 32-bits, since this will end up
@@ -262,6 +268,39 @@
     // excluding the bitmap.
     size_t CreateImageSections(ImageSection* out_sections, bool app_image) const;
 
+    size_t GetStubOffset(StubType stub_type) const {
+      DCHECK_LT(static_cast<size_t>(stub_type), kNumberOfStubTypes);
+      return stub_offsets_[static_cast<size_t>(stub_type)];
+    }
+
+    void SetStubOffset(StubType stub_type, size_t offset) {
+      DCHECK_LT(static_cast<size_t>(stub_type), kNumberOfStubTypes);
+      stub_offsets_[static_cast<size_t>(stub_type)] = offset;
+    }
+
+    size_t GetBinSlotOffset(Bin bin) const {
+      DCHECK_LT(static_cast<size_t>(bin), kNumberOfBins);
+      return bin_slot_offsets_[static_cast<size_t>(bin)];
+    }
+
+    void IncrementBinSlotSize(Bin bin, size_t size_to_add) {
+      DCHECK_LT(static_cast<size_t>(bin), kNumberOfBins);
+      bin_slot_sizes_[static_cast<size_t>(bin)] += size_to_add;
+    }
+
+    size_t GetBinSlotSize(Bin bin) const {
+      DCHECK_LT(static_cast<size_t>(bin), kNumberOfBins);
+      return bin_slot_sizes_[static_cast<size_t>(bin)];
+    }
+
+    void IncrementBinSlotCount(Bin bin, size_t count_to_add) {
+      DCHECK_LT(static_cast<size_t>(bin), kNumberOfBins);
+      bin_slot_count_[static_cast<size_t>(bin)] += count_to_add;
+    }
+
+    // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins.
+    size_t GetBinSizeSum(Bin up_to) const;
+
     std::unique_ptr<MemMap> image_;  // Memory mapped for generating the image.
 
     // Target begin of this image. Notes: It is not valid to write here, this is the address
@@ -300,12 +339,12 @@
     SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
 
     // Offset from oat_data_begin_ to the stubs.
-    uint32_t oat_address_offsets_[kOatAddressCount] = {};
+    uint32_t stub_offsets_[kNumberOfStubTypes] = {};
 
     // Bin slot tracking for dirty object packing.
-    size_t bin_slot_sizes_[kBinSize] = {};  // Number of bytes in a bin.
-    size_t bin_slot_offsets_[kBinSize] = {};  // Number of bytes in previous bins.
-    size_t bin_slot_count_[kBinSize] = {};  // Number of objects in a bin.
+    size_t bin_slot_sizes_[kNumberOfBins] = {};  // Number of bytes in a bin.
+    size_t bin_slot_offsets_[kNumberOfBins] = {};  // Number of bytes in previous bins.
+    size_t bin_slot_count_[kNumberOfBins] = {};  // Number of objects in a bin.
 
     // Cached size of the intern table for when we allocate memory.
     size_t intern_table_bytes_ = 0;
@@ -367,7 +406,7 @@
   }
 
   // Returns the address in the boot image if we are compiling the app image.
-  const uint8_t* GetOatAddress(OatAddress type) const;
+  const uint8_t* GetOatAddress(StubType type) const;
 
   const uint8_t* GetOatAddressForOffset(uint32_t offset, const ImageInfo& image_info) const {
     // With Quick, code is within the OatFile, as there are all in one
@@ -443,9 +482,6 @@
                               bool* quick_is_interpreted)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins.
-  size_t GetBinSizeSum(ImageInfo& image_info, Bin up_to = kBinSize) const;
-
   // Return true if a method is likely to be dirtied at runtime.
   bool WillMethodBeDirty(ArtMethod* m) const REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -572,9 +608,9 @@
     NativeObjectRelocationType type;
 
     bool IsArtMethodRelocation() const {
-      return type == kNativeObjectRelocationTypeArtMethodClean ||
-          type == kNativeObjectRelocationTypeArtMethodDirty ||
-          type == kNativeObjectRelocationTypeRuntimeMethod;
+      return type == NativeObjectRelocationType::kArtMethodClean ||
+          type == NativeObjectRelocationType::kArtMethodDirty ||
+          type == NativeObjectRelocationType::kRuntimeMethod;
     }
   };
   std::unordered_map<void*, NativeObjectRelocation> native_object_relocations_;
diff --git a/dex2oat/linker/index_bss_mapping_encoder.h b/dex2oat/linker/index_bss_mapping_encoder.h
index 9bc1432..c6326ed 100644
--- a/dex2oat/linker/index_bss_mapping_encoder.h
+++ b/dex2oat/linker/index_bss_mapping_encoder.h
@@ -17,9 +17,10 @@
 #ifndef ART_DEX2OAT_LINKER_INDEX_BSS_MAPPING_ENCODER_H_
 #define ART_DEX2OAT_LINKER_INDEX_BSS_MAPPING_ENCODER_H_
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
 #include "base/bit_vector-inl.h"
-#include "base/logging.h"
 #include "index_bss_mapping.h"
 
 namespace art {
diff --git a/dex2oat/linker/multi_oat_relative_patcher.cc b/dex2oat/linker/multi_oat_relative_patcher.cc
index 178a78f..1abaf7d 100644
--- a/dex2oat/linker/multi_oat_relative_patcher.cc
+++ b/dex2oat/linker/multi_oat_relative_patcher.cc
@@ -16,8 +16,9 @@
 
 #include "multi_oat_relative_patcher.h"
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "globals.h"
 
 namespace art {
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 3e7a7cd..d4fc59c 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -26,6 +26,7 @@
 #include "base/bit_vector-inl.h"
 #include "base/enums.h"
 #include "base/file_magic.h"
+#include "base/logging.h"  // For VLOG
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -274,7 +275,9 @@
  public:
   OatDexFile(const char* dex_file_location,
              DexFileSource source,
-             CreateTypeLookupTable create_type_lookup_table);
+             CreateTypeLookupTable create_type_lookup_table,
+             uint32_t dex_file_location_checksun,
+             size_t dex_file_size);
   OatDexFile(OatDexFile&& src) = default;
 
   const char* GetLocation() const {
@@ -295,31 +298,47 @@
   // Whether to create the type lookup table.
   CreateTypeLookupTable create_type_lookup_table_;
 
-  // Dex file size. Initialized when writing the dex file.
+  // Dex file size. Passed in the constructor, but could be
+  // overwritten by LayoutAndWriteDexFile.
   size_t dex_file_size_;
 
   // Offset of start of OatDexFile from beginning of OatHeader. It is
   // used to validate file position when writing.
   size_t offset_;
 
-  // Data to write.
-  uint32_t dex_file_location_size_;
-  const char* dex_file_location_data_;
-  uint32_t dex_file_location_checksum_;
+  ///// Start of data to write to vdex/oat file.
+
+  const uint32_t dex_file_location_size_;
+  const char* const dex_file_location_data_;
+
+  // The checksum of the dex file.
+  const uint32_t dex_file_location_checksum_;
+
+  // Offset of the dex file in the vdex file. Set when writing dex files in
+  // SeekToDexFile.
   uint32_t dex_file_offset_;
-  uint32_t class_offsets_offset_;
+
+  // The lookup table offset in the oat file. Set in WriteTypeLookupTables.
   uint32_t lookup_table_offset_;
+
+  // Class and BSS offsets set in PrepareLayout.
+  uint32_t class_offsets_offset_;
   uint32_t method_bss_mapping_offset_;
   uint32_t type_bss_mapping_offset_;
   uint32_t string_bss_mapping_offset_;
+
+  // Offset of dex sections that will have different runtime madvise states.
+  // Set in WriteDexLayoutSections.
   uint32_t dex_sections_layout_offset_;
 
-  // Data to write to a separate section.
+  // Data to write to a separate section. We set the length
+  // of the vector in OpenDexFiles.
   dchecked_vector<uint32_t> class_offsets_;
 
   // Dex section layout info to serialize.
   DexLayoutSections dex_sections_layout_;
 
+  ///// End of data to write to vdex/oat file.
  private:
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
@@ -417,6 +436,41 @@
     compact_dex_level_(compact_dex_level) {
 }
 
+static bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
+  const bool valid_standard_dex_magic = DexFileLoader::IsMagicValid(raw_header);
+  if (!valid_standard_dex_magic) {
+    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
+    return false;
+  }
+  if (!DexFileLoader::IsVersionAndMagicValid(raw_header)) {
+    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
+    return false;
+  }
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
+  if (header->file_size_ < sizeof(DexFile::Header)) {
+    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
+               << " File: " << location;
+    return false;
+  }
+  return true;
+}
+
+static const UnalignedDexFileHeader* GetDexFileHeader(File* file,
+                                                      uint8_t* raw_header,
+                                                      const char* location) {
+  // Read the dex file header and perform minimal verification.
+  if (!file->ReadFully(raw_header, sizeof(DexFile::Header))) {
+    PLOG(ERROR) << "Failed to read dex file header. Actual: "
+                << " File: " << location << " Output: " << file->GetPath();
+    return nullptr;
+  }
+  if (!ValidateDexFileHeader(raw_header, location)) {
+    return nullptr;
+  }
+
+  return AsUnalignedDexFileHeader(raw_header);
+}
+
 bool OatWriter::AddDexFileSource(const char* filename,
                                  const char* location,
                                  CreateTypeLookupTable create_type_lookup_table) {
@@ -428,12 +482,20 @@
     PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'";
     return false;
   } else if (DexFileLoader::IsMagicValid(magic)) {
+    uint8_t raw_header[sizeof(DexFile::Header)];
+    const UnalignedDexFileHeader* header = GetDexFileHeader(&fd, raw_header, location);
+    if (header == nullptr) {
+      return false;
+    }
     // The file is open for reading, not writing, so it's OK to let the File destructor
     // close it without checking for explicit Close(), so pass checkUsage = false.
     raw_dex_files_.emplace_back(new File(fd.Release(), location, /* checkUsage */ false));
-    oat_dex_files_.emplace_back(location,
-                                DexFileSource(raw_dex_files_.back().get()),
-                                create_type_lookup_table);
+    oat_dex_files_.emplace_back(/* OatDexFile */
+        location,
+        DexFileSource(raw_dex_files_.back().get()),
+        create_type_lookup_table,
+        header->checksum_,
+        header->file_size_);
   } else if (IsZipMagic(magic)) {
     if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) {
       return false;
@@ -467,9 +529,13 @@
     zipped_dex_files_.push_back(std::move(entry));
     zipped_dex_file_locations_.push_back(DexFileLoader::GetMultiDexLocation(i, location));
     const char* full_location = zipped_dex_file_locations_.back().c_str();
-    oat_dex_files_.emplace_back(full_location,
-                                DexFileSource(zipped_dex_files_.back().get()),
-                                create_type_lookup_table);
+    // We override the checksum from header with the CRC from ZIP entry.
+    oat_dex_files_.emplace_back(/* OatDexFile */
+        full_location,
+        DexFileSource(zipped_dex_files_.back().get()),
+        create_type_lookup_table,
+        zipped_dex_files_.back()->GetCrc32(),
+        zipped_dex_files_.back()->GetUncompressedLength());
   }
   if (zipped_dex_file_locations_.empty()) {
     LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg;
@@ -498,10 +564,13 @@
     // We used `zipped_dex_file_locations_` to keep the strings in memory.
     zipped_dex_file_locations_.push_back(DexFileLoader::GetMultiDexLocation(i, location));
     const char* full_location = zipped_dex_file_locations_.back().c_str();
-    oat_dex_files_.emplace_back(full_location,
-                                DexFileSource(current_dex_data),
-                                create_type_lookup_table);
-    oat_dex_files_.back().dex_file_location_checksum_ = vdex_file.GetLocationChecksum(i);
+    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(current_dex_data);
+    oat_dex_files_.emplace_back(/* OatDexFile */
+        full_location,
+        DexFileSource(current_dex_data),
+        create_type_lookup_table,
+        vdex_file.GetLocationChecksum(i),
+        header->file_size_);
   }
 
   if (vdex_file.GetNextDexFileData(current_dex_data) != nullptr) {
@@ -537,8 +606,12 @@
     return false;
   }
 
-  oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table);
-  oat_dex_files_.back().dex_file_location_checksum_ = location_checksum;
+  oat_dex_files_.emplace_back(/* OatDexFile */
+      location,
+      DexFileSource(data.data()),
+      create_type_lookup_table,
+      location_checksum,
+      header->file_size_);
   return true;
 }
 
@@ -1519,11 +1592,10 @@
       ScopedObjectAccessUnchecked soa(self);
       StackHandleScope<1> hs(self);
       method = class_linker_->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
-          *dex_file_,
           it.GetMemberIndex(),
           hs.NewHandle(dex_cache),
           ScopedNullHandle<mirror::ClassLoader>(),
-          nullptr,
+          /* referrer */ nullptr,
           invoke_type);
       if (method == nullptr) {
         LOG(FATAL_WITHOUT_ABORT) << "Unexpected failure to resolve a method: "
@@ -1879,20 +1951,21 @@
         : class_linker_->FindDexCache(Thread::Current(), *target_dex_file);
   }
 
-  mirror::Class* GetTargetType(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> GetTargetType(const LinkerPatch& patch)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(writer_->HasImage());
     ObjPtr<mirror::DexCache> dex_cache = GetDexCache(patch.TargetTypeDexFile());
     ObjPtr<mirror::Class> type =
-        ClassLinker::LookupResolvedType(patch.TargetTypeIndex(), dex_cache, class_loader_);
+        class_linker_->LookupResolvedType(patch.TargetTypeIndex(), dex_cache, class_loader_);
     CHECK(type != nullptr);
-    return type.Ptr();
+    return type;
   }
 
-  mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::String> GetTargetString(const LinkerPatch& patch)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
-    mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
-                                                  patch.TargetStringIndex(),
-                                                  GetDexCache(patch.TargetStringDexFile()));
+    ObjPtr<mirror::String> string =
+        linker->LookupString(patch.TargetStringIndex(), GetDexCache(patch.TargetStringDexFile()));
     DCHECK(string != nullptr);
     DCHECK(writer_->HasBootImage() ||
            Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
@@ -1908,13 +1981,14 @@
     return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(method) - oat_data_begin);
   }
 
-  uint32_t GetTargetObjectOffset(mirror::Object* object) REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t GetTargetObjectOffset(ObjPtr<mirror::Object> object)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(writer_->HasBootImage());
-    object = writer_->image_writer_->GetImageAddress(object);
+    object = writer_->image_writer_->GetImageAddress(object.Ptr());
     size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_);
     uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index);
     // TODO: Clean up offset types. The target offset must be treated as signed.
-    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(object) - oat_data_begin);
+    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(object.Ptr()) - oat_data_begin);
   }
 
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
@@ -2622,7 +2696,8 @@
           CompiledMethod* compiled_method =
               driver.GetCompiledMethod(MethodReference(dex_file, method_idx));
           const DexFile::CodeItem* code_item = class_it.GetMethodCodeItem();
-          uint32_t existing_debug_info_offset = OatFile::GetDebugInfoOffset(*dex_file, code_item);
+          CodeItemDebugInfoAccessor accessor(dex_file, code_item);
+          const uint32_t existing_debug_info_offset = accessor.DebugInfoOffset();
           // If the existing offset is already out of bounds (and not magic marker 0xFFFFFFFF)
           // we will pretend the method has been quickened.
           bool existing_offset_out_of_bounds =
@@ -2670,10 +2745,6 @@
   size_t initial_offset = vdex_size_;
   size_t start_offset = RoundUp(initial_offset, 4u);
 
-  vdex_size_ = start_offset;
-  vdex_quickening_info_offset_ = vdex_size_;
-  size_quickening_info_alignment_ = start_offset - initial_offset;
-
   off_t actual_offset = vdex_out->Seek(start_offset, kSeekSet);
   if (actual_offset != static_cast<off_t>(start_offset)) {
     PLOG(ERROR) << "Failed to seek to quickening info section. Actual: " << actual_offset
@@ -2717,7 +2788,16 @@
     size_quickening_info_ = 0;
   }
 
-  vdex_size_ += size_quickening_info_;
+  if (size_quickening_info_ == 0) {
+    // Nothing was written. Leave `vdex_size_` untouched and unaligned.
+    vdex_quickening_info_offset_ = initial_offset;
+    size_quickening_info_alignment_ = 0;
+  } else {
+    vdex_size_ = start_offset + size_quickening_info_;
+    vdex_quickening_info_offset_ = start_offset;
+    size_quickening_info_alignment_ = start_offset - initial_offset;
+  }
+
   return true;
 }
 
@@ -3210,44 +3290,6 @@
   return true;
 }
 
-bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) {
-  // Read the dex file header and perform minimal verification.
-  uint8_t raw_header[sizeof(DexFile::Header)];
-  if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) {
-    PLOG(ERROR) << "Failed to read dex file header. Actual: "
-                << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
-    return false;
-  }
-  if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) {
-    return false;
-  }
-
-  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
-  oat_dex_file->dex_file_size_ = header->file_size_;
-  oat_dex_file->dex_file_location_checksum_ = header->checksum_;
-  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
-  return true;
-}
-
-bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) {
-  const bool valid_standard_dex_magic = DexFileLoader::IsMagicValid(raw_header);
-  if (!valid_standard_dex_magic) {
-    LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location;
-    return false;
-  }
-  if (!DexFileLoader::IsVersionAndMagicValid(raw_header)) {
-    LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location;
-    return false;
-  }
-  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header);
-  if (header->file_size_ < sizeof(DexFile::Header)) {
-    LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header."
-               << " File: " << location;
-    return false;
-  }
-  return true;
-}
-
 bool OatWriter::WriteDexFiles(OutputStream* out, File* file, bool update_input_vdex) {
   TimingLogger::ScopedTiming split("Write Dex files", timings_);
 
@@ -3402,12 +3444,15 @@
   DexLayout dex_layout(options, profile_compilation_info_, nullptr);
   dex_layout.ProcessDexFile(location.c_str(), dex_file.get(), 0);
   std::unique_ptr<MemMap> mem_map(dex_layout.GetAndReleaseMemMap());
+  oat_dex_file->dex_sections_layout_ = dex_layout.GetSections();
+  // Dex layout can affect the size of the dex file, so we update here what we have set
+  // when adding the dex file as a source.
+  const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(mem_map->Begin());
+  oat_dex_file->dex_file_size_ = header->file_size_;
   if (!WriteDexFile(out, oat_dex_file, mem_map->Begin(), /* update_input_vdex */ false)) {
     return false;
   }
-  oat_dex_file->dex_sections_layout_ = dex_layout.GetSections();
-  // Set the checksum of the new oat dex file to be the original file's checksum.
-  oat_dex_file->dex_file_location_checksum_ = dex_file->GetLocationChecksum();
+  CHECK_EQ(oat_dex_file->dex_file_location_checksum_, dex_file->GetLocationChecksum());
   return true;
 }
 
@@ -3457,9 +3502,6 @@
                 << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
     return false;
   }
-  if (!ReadDexFileHeader(file, oat_dex_file)) {
-    return false;
-  }
   if (extracted_size < oat_dex_file->dex_file_size_) {
     LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size
                << " file size from header: " << oat_dex_file->dex_file_size_
@@ -3467,9 +3509,6 @@
     return false;
   }
 
-  // Override the checksum from header with the CRC from ZIP entry.
-  oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32();
-
   // Seek both file and stream to the end offset.
   size_t end_offset = start_offset + oat_dex_file->dex_file_size_;
   actual_offset = lseek(file->Fd(), end_offset, SEEK_SET);
@@ -3518,9 +3557,6 @@
                 << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath();
     return false;
   }
-  if (!ReadDexFileHeader(dex_file, oat_dex_file)) {
-    return false;
-  }
 
   // Copy the input dex file using sendfile().
   if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) {
@@ -3582,12 +3618,6 @@
       return false;
     }
   }
-
-  // Update dex file size and resize class offsets in the OatDexFile.
-  // Note: For raw data, the checksum is passed directly to AddRawDexFileSource().
-  // Note: For vdex, the checksum is copied from the existing vdex file.
-  oat_dex_file->dex_file_size_ = header->file_size_;
-  oat_dex_file->class_offsets_.resize(header->class_defs_size_);
   return true;
 }
 
@@ -3623,29 +3653,22 @@
   }
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    // Make sure no one messed with input files while we were copying data.
-    // At the very least we need consistent file size and number of class definitions.
     const uint8_t* raw_dex_file =
         dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset;
-    if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) {
-      // Note: ValidateDexFileHeader() already logged an error message.
-      LOG(ERROR) << "Failed to verify written dex file header!"
+
+    if (kIsDebugBuild) {
+      // Sanity check our input files.
+      // Note that ValidateDexFileHeader() logs error messages.
+      CHECK(ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation()))
+          << "Failed to verify written dex file header!"
           << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset
           << " ~ " << static_cast<const void*>(raw_dex_file);
-      return false;
-    }
-    const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
-    if (header->file_size_ != oat_dex_file.dex_file_size_) {
-      LOG(ERROR) << "File size mismatch in written dex file header! Expected: "
+
+      const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+      CHECK_EQ(header->file_size_, oat_dex_file.dex_file_size_)
+          << "File size mismatch in written dex file header! Expected: "
           << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_
           << " Output: " << file->GetPath();
-      return false;
-    }
-    if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) {
-      LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: "
-          << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_
-          << " Output: " << file->GetPath();
-      return false;
     }
 
     // Now, open the dex file.
@@ -3662,6 +3685,10 @@
                  << " Error: " << error_msg;
       return false;
     }
+
+    // Set the class_offsets size now that we have easy access to the DexFile and
+    // it has been verified in DexFileLoader::Open.
+    oat_dex_file.class_offsets_.resize(dex_files.back()->GetHeader().class_defs_size_);
   }
 
   *opened_dex_files_map = std::move(dex_files_map);
@@ -3845,6 +3872,7 @@
 
   DCHECK_NE(vdex_dex_files_offset_, 0u);
   DCHECK_NE(vdex_verifier_deps_offset_, 0u);
+  DCHECK_NE(vdex_quickening_info_offset_, 0u);
 
   size_t dex_section_size = vdex_verifier_deps_offset_ - vdex_dex_files_offset_;
   size_t verifier_deps_section_size = vdex_quickening_info_offset_ - vdex_verifier_deps_offset_;
@@ -3899,17 +3927,19 @@
 
 OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
                                   DexFileSource source,
-                                  CreateTypeLookupTable create_type_lookup_table)
+                                  CreateTypeLookupTable create_type_lookup_table,
+                                  uint32_t dex_file_location_checksum,
+                                  size_t dex_file_size)
     : source_(source),
       create_type_lookup_table_(create_type_lookup_table),
-      dex_file_size_(0),
+      dex_file_size_(dex_file_size),
       offset_(0),
       dex_file_location_size_(strlen(dex_file_location)),
       dex_file_location_data_(dex_file_location),
-      dex_file_location_checksum_(0u),
+      dex_file_location_checksum_(dex_file_location_checksum),
       dex_file_offset_(0u),
-      class_offsets_offset_(0u),
       lookup_table_offset_(0u),
+      class_offsets_offset_(0u),
       method_bss_mapping_offset_(0u),
       type_bss_mapping_offset_(0u),
       string_bss_mapping_offset_(0u),
diff --git a/dex2oat/linker/oat_writer.h b/dex2oat/linker/oat_writer.h
index e0cb7ec..4055878 100644
--- a/dex2oat/linker/oat_writer.h
+++ b/dex2oat/linker/oat_writer.h
@@ -325,8 +325,6 @@
   size_t WriteCodeDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset);
 
   bool RecordOatDataOffset(OutputStream* out);
-  bool ReadDexFileHeader(File* oat_file, OatDexFile* oat_dex_file);
-  bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
   bool WriteTypeLookupTables(OutputStream* oat_rodata,
                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteDexLayoutSections(OutputStream* oat_rodata,
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 7509d91..e9958b1 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -45,6 +45,7 @@
 #include "oat_writer.h"
 #include "scoped_thread_state_change-inl.h"
 #include "utils/test_dex_file_builder.h"
+#include "vdex_file.h"
 
 namespace art {
 namespace linker {
@@ -102,7 +103,6 @@
     callbacks_.reset(new QuickCompilerCallbacks(CompilerCallbacks::CallbackMode::kCompileApp));
     callbacks_->SetVerificationResults(verification_results_.get());
     Runtime::Current()->SetCompilerCallbacks(callbacks_.get());
-    timer_.reset(new CumulativeLogger("Compilation times"));
     compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
                                               verification_results_.get(),
                                               compiler_kind,
@@ -112,9 +112,6 @@
                                               /* compiled_classes */ nullptr,
                                               /* compiled_methods */ nullptr,
                                               /* thread_count */ 2,
-                                              /* dump_stats */ true,
-                                              /* dump_passes */ true,
-                                              timer_.get(),
                                               /* swap_fd */ -1,
                                               /* profile_compilation_info */ nullptr));
   }
@@ -229,6 +226,9 @@
     if (!oat_writer.WriteVerifierDeps(vdex_out.get(), nullptr)) {
       return false;
     }
+    if (!oat_writer.WriteQuickeningInfo(vdex_out.get())) {
+      return false;
+    }
     if (!oat_writer.WriteChecksumsAndVdexHeader(vdex_out.get())) {
       return false;
     }
@@ -640,6 +640,11 @@
   std::unique_ptr<const DexFile> opened_dex_file2 =
       opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg);
 
+  ASSERT_EQ(opened_oat_file->GetOatDexFiles()[0]->GetDexFileLocationChecksum(),
+            dex_file1_data->GetHeader().checksum_);
+  ASSERT_EQ(opened_oat_file->GetOatDexFiles()[1]->GetDexFileLocationChecksum(),
+            dex_file2_data->GetHeader().checksum_);
+
   ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_);
   ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(),
                       &opened_dex_file1->GetHeader(),
@@ -651,6 +656,13 @@
                       &opened_dex_file2->GetHeader(),
                       dex_file2_data->GetHeader().file_size_));
   ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation());
+
+  const VdexFile::Header &vdex_header = opened_oat_file->GetVdexFile()->GetHeader();
+  ASSERT_EQ(vdex_header.GetQuickeningInfoSize(), 0u);
+
+  int64_t actual_vdex_size = vdex_file.GetFile()->GetLength();
+  ASSERT_GE(actual_vdex_size, 0);
+  ASSERT_EQ((uint64_t) actual_vdex_size, vdex_header.GetComputedFileSize());
 }
 
 TEST_F(OatTest, DexFileInputCheckOutput) {
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index a7af193..527a5b9 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -1203,10 +1203,16 @@
   bool is_static = (flags & kAccStatic) != 0;
   fprintf(gOutFile, "      positions     : \n");
   uint32_t debug_info_offset = pDexFile->GetDebugInfoOffset(pCode);
-  pDexFile->DecodeDebugPositionInfo(pCode, debug_info_offset, dumpPositionsCb, nullptr);
+  pDexFile->DecodeDebugPositionInfo(debug_info_offset, dumpPositionsCb, nullptr);
   fprintf(gOutFile, "      locals        : \n");
-  pDexFile->DecodeDebugLocalInfo(
-      pCode, debug_info_offset, is_static, idx, dumpLocalsCb, nullptr);
+  pDexFile->DecodeDebugLocalInfo(pCode->registers_size_,
+                                 pCode->ins_size_,
+                                 pCode->insns_size_in_code_units_,
+                                 debug_info_offset,
+                                 is_static,
+                                 idx,
+                                 dumpLocalsCb,
+                                 nullptr);
 }
 
 /*
diff --git a/dexdump/dexdump_cfg.cc b/dexdump/dexdump_cfg.cc
index 62c970d..23ecf93 100644
--- a/dexdump/dexdump_cfg.cc
+++ b/dexdump/dexdump_cfg.cc
@@ -23,6 +23,7 @@
 #include <map>
 #include <ostream>
 #include <set>
+#include <sstream>
 
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 43c3d12..382b551 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -28,7 +28,9 @@
 #include <string.h>
 #include <unistd.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include <base/logging.h>  // For InitLogging.
 #include "mem_map.h"
 #include "runtime.h"
 
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 2af579c..90df2d7 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -674,20 +674,20 @@
   // Add "fixup" references to types, strings, methods, and fields.
   // This is temporary, as we will probably want more detailed parsing of the
   // instructions here.
-  std::unique_ptr<std::vector<TypeId*>> type_ids(new std::vector<TypeId*>());
-  std::unique_ptr<std::vector<StringId*>> string_ids(new std::vector<StringId*>());
-  std::unique_ptr<std::vector<MethodId*>> method_ids(new std::vector<MethodId*>());
-  std::unique_ptr<std::vector<FieldId*>> field_ids(new std::vector<FieldId*>());
+  std::vector<TypeId*> type_ids;
+  std::vector<StringId*> string_ids;
+  std::vector<MethodId*> method_ids;
+  std::vector<FieldId*> field_ids;
   if (GetIdsFromByteCode(*this,
                          code_item,
-                         type_ids.get(),
-                         string_ids.get(),
-                         method_ids.get(),
-                         field_ids.get())) {
-    CodeFixups* fixups = new CodeFixups(type_ids.release(),
-                                        string_ids.release(),
-                                        method_ids.release(),
-                                        field_ids.release());
+                         /*out*/ &type_ids,
+                         /*out*/ &string_ids,
+                         /*out*/ &method_ids,
+                         /*out*/ &field_ids)) {
+    CodeFixups* fixups = new CodeFixups(std::move(type_ids),
+                                        std::move(string_ids),
+                                        std::move(method_ids),
+                                        std::move(field_ids));
     code_item->SetCodeFixups(fixups);
   }
 
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 8421774..b25e164 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -1013,25 +1013,25 @@
 
 class CodeFixups {
  public:
-  CodeFixups(std::vector<TypeId*>* type_ids,
-             std::vector<StringId*>* string_ids,
-             std::vector<MethodId*>* method_ids,
-             std::vector<FieldId*>* field_ids)
-      : type_ids_(type_ids),
-        string_ids_(string_ids),
-        method_ids_(method_ids),
-        field_ids_(field_ids) { }
+  CodeFixups(std::vector<TypeId*> type_ids,
+             std::vector<StringId*> string_ids,
+             std::vector<MethodId*> method_ids,
+             std::vector<FieldId*> field_ids)
+      : type_ids_(std::move(type_ids)),
+        string_ids_(std::move(string_ids)),
+        method_ids_(std::move(method_ids)),
+        field_ids_(std::move(field_ids)) { }
 
-  std::vector<TypeId*>* TypeIds() const { return type_ids_.get(); }
-  std::vector<StringId*>* StringIds() const { return string_ids_.get(); }
-  std::vector<MethodId*>* MethodIds() const { return method_ids_.get(); }
-  std::vector<FieldId*>* FieldIds() const { return field_ids_.get(); }
+  const std::vector<TypeId*>& TypeIds() const { return type_ids_; }
+  const std::vector<StringId*>& StringIds() const { return string_ids_; }
+  const std::vector<MethodId*>& MethodIds() const { return method_ids_; }
+  const std::vector<FieldId*>& FieldIds() const { return field_ids_; }
 
  private:
-  std::unique_ptr<std::vector<TypeId*>> type_ids_;
-  std::unique_ptr<std::vector<StringId*>> string_ids_;
-  std::unique_ptr<std::vector<MethodId*>> method_ids_;
-  std::unique_ptr<std::vector<FieldId*>> field_ids_;
+  std::vector<TypeId*> type_ids_;
+  std::vector<StringId*> string_ids_;
+  std::vector<MethodId*> method_ids_;
+  std::vector<FieldId*> field_ids_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeFixups);
 };
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index 4b46341..e4ed69b 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -188,20 +188,16 @@
       DumpAddressRange(code_item, class_index);
       const dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
       if (fixups != nullptr) {
-        std::vector<dex_ir::TypeId*>* type_ids = fixups->TypeIds();
-        for (dex_ir::TypeId* type_id : *type_ids) {
+        for (dex_ir::TypeId* type_id : fixups->TypeIds()) {
           DumpTypeId(type_id, class_index);
         }
-        std::vector<dex_ir::StringId*>* string_ids = fixups->StringIds();
-        for (dex_ir::StringId* string_id : *string_ids) {
+        for (dex_ir::StringId* string_id : fixups->StringIds()) {
           DumpStringId(string_id, class_index);
         }
-        std::vector<dex_ir::MethodId*>* method_ids = fixups->MethodIds();
-        for (dex_ir::MethodId* method_id : *method_ids) {
+        for (dex_ir::MethodId* method_id : fixups->MethodIds()) {
           DumpMethodId(method_id, class_index);
         }
-        std::vector<dex_ir::FieldId*>* field_ids = fixups->FieldIds();
-        for (dex_ir::FieldId* field_id : *field_ids) {
+        for (dex_ir::FieldId* field_id : fixups->FieldIds()) {
           DumpFieldId(field_id, class_index);
         }
       }
diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc
index e83f98e..b250701 100644
--- a/dexlayout/dexdiag.cc
+++ b/dexlayout/dexdiag.cc
@@ -26,6 +26,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "base/logging.h"  // For InitLogging.
 #include "base/stringpiece.h"
 
 #include "dex_file.h"
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index d904a52..33155b6 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -33,6 +33,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "dex_file-inl.h"
 #include "dex_file_layout.h"
 #include "dex_file_loader.h"
@@ -1656,11 +1657,11 @@
           continue;
         }
         // Add const-strings.
-        for (dex_ir::StringId* id : *fixups->StringIds()) {
+        for (dex_ir::StringId* id : fixups->StringIds()) {
           from_hot_method[id->GetIndex()] = true;
         }
         // Add field classes, names, and types.
-        for (dex_ir::FieldId* id : *fixups->FieldIds()) {
+        for (dex_ir::FieldId* id : fixups->FieldIds()) {
           // TODO: Only visit field ids from static getters and setters.
           from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
           from_hot_method[id->Name()->GetIndex()] = true;
@@ -1668,7 +1669,7 @@
         }
         // For clinits, add referenced method classes, names, and protos.
         if (is_clinit) {
-          for (dex_ir::MethodId* id : *fixups->MethodIds()) {
+          for (dex_ir::MethodId* id : fixups->MethodIds()) {
             from_hot_method[id->Class()->GetStringId()->GetIndex()] = true;
             from_hot_method[id->Name()->GetIndex()] = true;
             is_shorty[id->Proto()->Shorty()->GetIndex()] = true;
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index 17097f1..5bb7196 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -29,7 +29,9 @@
 #include <sys/types.h>
 #include <unistd.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/logging.h"  // For InitLogging.
 #include "jit/profile_compilation_info.h"
 #include "mem_map.h"
 #include "runtime.h"
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index 3bd903d..4c13ed6 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -26,6 +26,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+#include "base/logging.h"  // For InitLogging.
 #include "dex_file-inl.h"
 #include "dex_file_loader.h"
 #include "mem_map.h"
@@ -121,7 +122,7 @@
   // Find the first line.
   int firstLine = -1;
   uint32_t debug_info_offset = pDexFile->GetDebugInfoOffset(pCode);
-  pDexFile->DecodeDebugPositionInfo(pCode, debug_info_offset, positionsCb, &firstLine);
+  pDexFile->DecodeDebugPositionInfo(debug_info_offset, positionsCb, &firstLine);
 
   // Method signature.
   const Signature signature = pDexFile->GetMethodSignature(pMethodId);
diff --git a/dexoptanalyzer/dexoptanalyzer.cc b/dexoptanalyzer/dexoptanalyzer.cc
index 39c9b99..eead2dc 100644
--- a/dexoptanalyzer/dexoptanalyzer.cc
+++ b/dexoptanalyzer/dexoptanalyzer.cc
@@ -16,9 +16,11 @@
 
 #include <string>
 
+#include "base/logging.h"  // For InitLogging.
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
 #include "base/file_utils.h"
+#include "base/logging.h"  // For InitLogging.
 #include "compiler_filter.h"
 #include "class_loader_context.h"
 #include "dex_file.h"
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 05fce96..8aa638a 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -1150,10 +1150,10 @@
 
     bool found_boot_map = false;
     // Find the memory map only for boot.art
-    for (const backtrace_map_t& map : *tmp_proc_maps) {
-      if (EndsWith(map.name, GetImageLocationBaseName())) {
-        if ((map.flags & PROT_WRITE) != 0) {
-          boot_map_ = map;
+    for (const backtrace_map_t* map : *tmp_proc_maps) {
+      if (EndsWith(map->name, GetImageLocationBaseName())) {
+        if ((map->flags & PROT_WRITE) != 0) {
+          boot_map_ = *map;
           found_boot_map = true;
           break;
         }
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index 906404b..667c37c 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -41,7 +41,7 @@
 
 .PHONY: dump-oat-core-target-$(TARGET_ARCH)
 ifeq ($(ART_BUILD_TARGET),true)
-dump-oat-core-target-$(TARGET_ARCH): $(TARGET_CORE_IMAGE_default_$(ART_PHONY_TEST_TARGET_SUFFIX)) $(OATDUMP)
+dump-oat-core-target-$(TARGET_ARCH): $(TARGET_CORE_IMAGE_DEFAULT_$(ART_PHONY_TEST_TARGET_SUFFIX)) $(OATDUMP)
 	$(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) \
 	  --output=$(ART_DUMP_OAT_PATH)/core.target.$(TARGET_ARCH).oatdump.txt --instruction-set=$(TARGET_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.$(TARGET_ARCH).oatdump.txt
@@ -50,7 +50,7 @@
 ifdef TARGET_2ND_ARCH
 .PHONY: dump-oat-core-target-$(TARGET_2ND_ARCH)
 ifeq ($(ART_BUILD_TARGET),true)
-dump-oat-core-target-$(TARGET_2ND_ARCH): $(TARGET_CORE_IMAGE_default_$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)) $(OATDUMP)
+dump-oat-core-target-$(TARGET_2ND_ARCH): $(TARGET_CORE_IMAGE_DEFAULT_$(2ND_ART_PHONY_TEST_TARGET_SUFFIX)) $(OATDUMP)
 	$(OATDUMP) --image=$(TARGET_CORE_IMG_LOCATION) \
 	  --output=$(ART_DUMP_OAT_PATH)/core.target.$(TARGET_2ND_ARCH).oatdump.txt --instruction-set=$(TARGET_2ND_ARCH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/core.target.$(TARGET_2ND_ARCH).oatdump.txt
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 2c150876..1a1d8cc 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -146,13 +146,10 @@
 
     auto* rodata = builder_->GetRoData();
     auto* text = builder_->GetText();
-    auto* bss = builder_->GetBss();
 
     const uint8_t* rodata_begin = oat_file_->Begin();
     const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-    if (no_bits_) {
-      rodata->WriteNoBitsSection(rodata_size);
-    } else {
+    if (!no_bits_) {
       rodata->Start();
       rodata->WriteFully(rodata_begin, rodata_size);
       rodata->End();
@@ -160,18 +157,12 @@
 
     const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
     const size_t text_size = oat_file_->End() - text_begin;
-    if (no_bits_) {
-      text->WriteNoBitsSection(text_size);
-    } else {
+    if (!no_bits_) {
       text->Start();
       text->WriteFully(text_begin, text_size);
       text->End();
     }
 
-    if (oat_file_->BssSize() != 0) {
-      bss->WriteNoBitsSection(oat_file_->BssSize());
-    }
-
     if (isa == InstructionSet::kMips || isa == InstructionSet::kMips64) {
       builder_->WriteMIPSabiflagsSection();
     }
@@ -2269,7 +2260,7 @@
           os << StringPrintf("null   %s\n", PrettyDescriptor(field->GetTypeDescriptor()).c_str());
         } else {
           // Grab the field type without causing resolution.
-          ObjPtr<mirror::Class> field_type = field->LookupType();
+          ObjPtr<mirror::Class> field_type = field->LookupResolvedType();
           if (field_type != nullptr) {
             PrettyObjectValue(os, field_type, value);
           } else {
diff --git a/openjdkjvm/OpenjdkJvm.cc b/openjdkjvm/OpenjdkJvm.cc
index 29ebefd..1b8233a 100644
--- a/openjdkjvm/OpenjdkJvm.cc
+++ b/openjdkjvm/OpenjdkJvm.cc
@@ -40,9 +40,10 @@
 #include <sys/time.h>
 #include <unistd.h>
 
+#include <android-base/logging.h>
+
 #include "../../libcore/ojluni/src/main/native/jvm.h"  // TODO(narayan): fix it
 
-#include "base/logging.h"
 #include "base/macros.h"
 #include "common_throws.h"
 #include "gc/heap.h"
diff --git a/openjdkjvmti/OpenjdkJvmTi.cc b/openjdkjvmti/OpenjdkJvmTi.cc
index 62f723d..aae8055 100644
--- a/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/openjdkjvmti/OpenjdkJvmTi.cc
@@ -33,12 +33,14 @@
 #include <type_traits>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include <jni.h>
 
 #include "jvmti.h"
 
 #include "art_jvmti.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For gLogVerbosity.
 #include "base/mutex.h"
 #include "events-inl.h"
 #include "jni_env_ext-inl.h"
@@ -1437,6 +1439,7 @@
       art::gLogVerbosity.third_party_jni = val;
       art::gLogVerbosity.threads = val;
       art::gLogVerbosity.verifier = val;
+      // Do not set verifier-debug.
       art::gLogVerbosity.image = val;
 
       // Note: can't switch systrace_lock_logging. That requires changing entrypoints.
diff --git a/openjdkjvmti/art_jvmti.h b/openjdkjvmti/art_jvmti.h
index e8e62c2..2a8c2e9 100644
--- a/openjdkjvmti/art_jvmti.h
+++ b/openjdkjvmti/art_jvmti.h
@@ -39,9 +39,10 @@
 
 #include <jni.h>
 
+#include <android-base/logging.h>
+
 #include "deopt_manager.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/strlcpy.h"
 #include "base/mutex.h"
diff --git a/openjdkjvmti/events.cc b/openjdkjvmti/events.cc
index be4ebbc..330a3de 100644
--- a/openjdkjvmti/events.cc
+++ b/openjdkjvmti/events.cc
@@ -36,7 +36,6 @@
 #include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
 #include "deopt_manager.h"
 #include "dex_file_types.h"
 #include "gc/allocation_listener.h"
@@ -139,7 +138,9 @@
 }
 
 
-void EventMasks::EnableEvent(art::Thread* thread, ArtJvmtiEvent event) {
+void EventMasks::EnableEvent(ArtJvmTiEnv* env, art::Thread* thread, ArtJvmtiEvent event) {
+  DCHECK_EQ(&env->event_masks, this);
+  env->event_info_mutex_.AssertExclusiveHeld(art::Thread::Current());
   DCHECK(EventMask::EventIsInRange(event));
   GetEventMask(thread).Set(event);
   if (thread != nullptr) {
@@ -147,7 +148,9 @@
   }
 }
 
-void EventMasks::DisableEvent(art::Thread* thread, ArtJvmtiEvent event) {
+void EventMasks::DisableEvent(ArtJvmTiEnv* env, art::Thread* thread, ArtJvmtiEvent event) {
+  DCHECK_EQ(&env->event_masks, this);
+  env->event_info_mutex_.AssertExclusiveHeld(art::Thread::Current());
   DCHECK(EventMask::EventIsInRange(event));
   GetEventMask(thread).Set(event, false);
   if (thread != nullptr) {
@@ -899,9 +902,9 @@
   }
 }
 
-static void SetupTraceListener(JvmtiMethodTraceListener* listener,
-                               ArtJvmtiEvent event,
-                               bool enable) {
+void EventHandler::SetupTraceListener(JvmtiMethodTraceListener* listener,
+                                      ArtJvmtiEvent event,
+                                      bool enable) {
   bool needs_full_deopt = EventNeedsFullDeopt(event);
   // Make sure we can deopt.
   {
@@ -921,8 +924,21 @@
   }
 
   // Add the actual listeners.
-  art::ScopedThreadStateChange stsc(art::Thread::Current(), art::ThreadState::kNative);
   uint32_t new_events = GetInstrumentationEventsFor(event);
+  if (new_events == art::instrumentation::Instrumentation::kDexPcMoved) {
+    // Need to skip adding the listeners if the event is breakpoint/single-step since those events
+    // share the same art-instrumentation underlying event. We need to give them their own deopt
+    // request though so the test waits until here.
+    DCHECK(event == ArtJvmtiEvent::kBreakpoint || event == ArtJvmtiEvent::kSingleStep);
+    ArtJvmtiEvent other = event == ArtJvmtiEvent::kBreakpoint ? ArtJvmtiEvent::kSingleStep
+                                                              : ArtJvmtiEvent::kBreakpoint;
+    if (IsEventEnabledAnywhere(other)) {
+      // The event needs to be kept around/is already enabled by the other jvmti event that uses the
+      // same instrumentation event.
+      return;
+    }
+  }
+  art::ScopedThreadStateChange stsc(art::Thread::Current(), art::ThreadState::kNative);
   art::instrumentation::Instrumentation* instr = art::Runtime::Current()->GetInstrumentation();
   art::gc::ScopedGCCriticalSection gcs(art::Thread::Current(),
                                        art::gc::kGcCauseInstrumentation,
@@ -1002,18 +1018,6 @@
     case ArtJvmtiEvent::kGarbageCollectionFinish:
       SetupGcPauseTracking(gc_pause_listener_.get(), event, enable);
       return;
-
-    case ArtJvmtiEvent::kBreakpoint:
-    case ArtJvmtiEvent::kSingleStep: {
-      ArtJvmtiEvent other = (event == ArtJvmtiEvent::kBreakpoint) ? ArtJvmtiEvent::kSingleStep
-                                                                  : ArtJvmtiEvent::kBreakpoint;
-      // We only need to do anything if there isn't already a listener installed/held-on by the
-      // other jvmti event that uses DexPcMoved.
-      if (!IsEventEnabledAnywhere(other)) {
-        SetupTraceListener(method_trace_listener_.get(), event, enable);
-      }
-      return;
-    }
     // FramePop can never be disabled once it's been turned on since we would either need to deal
     // with dangling pointers or have missed events.
     // TODO We really need to make this not the case anymore.
@@ -1030,6 +1034,8 @@
     case ArtJvmtiEvent::kFieldModification:
     case ArtJvmtiEvent::kException:
     case ArtJvmtiEvent::kExceptionCatch:
+    case ArtJvmtiEvent::kBreakpoint:
+    case ArtJvmtiEvent::kSingleStep:
       SetupTraceListener(method_trace_listener_.get(), event, enable);
       return;
     case ArtJvmtiEvent::kMonitorContendedEnter:
@@ -1131,20 +1137,28 @@
     return ERR(MUST_POSSESS_CAPABILITY);
   }
 
-  bool old_state = global_mask.Test(event);
+  bool old_state;
+  bool new_state;
 
-  if (mode == JVMTI_ENABLE) {
-    env->event_masks.EnableEvent(thread, event);
-    global_mask.Set(event);
-  } else {
-    DCHECK_EQ(mode, JVMTI_DISABLE);
+  {
+    // Change the event masks atomically.
+    art::Thread* self = art::Thread::Current();
+    art::MutexLock mu(self, envs_lock_);
+    art::WriterMutexLock mu_env_info(self, env->event_info_mutex_);
+    old_state = global_mask.Test(event);
+    if (mode == JVMTI_ENABLE) {
+      env->event_masks.EnableEvent(env, thread, event);
+      global_mask.Set(event);
+      new_state = true;
+    } else {
+      DCHECK_EQ(mode, JVMTI_DISABLE);
 
-    env->event_masks.DisableEvent(thread, event);
-    RecalculateGlobalEventMask(event);
+      env->event_masks.DisableEvent(env, thread, event);
+      RecalculateGlobalEventMaskLocked(event);
+      new_state = global_mask.Test(event);
+    }
   }
 
-  bool new_state = global_mask.Test(event);
-
   // Handle any special work required for the event type.
   if (new_state != old_state) {
     HandleEventType(event, mode == JVMTI_ENABLE);
diff --git a/openjdkjvmti/events.h b/openjdkjvmti/events.h
index c73215f..81edb93 100644
--- a/openjdkjvmti/events.h
+++ b/openjdkjvmti/events.h
@@ -20,7 +20,10 @@
 #include <bitset>
 #include <vector>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "jvmti.h"
 #include "thread.h"
 
@@ -149,8 +152,16 @@
 
   EventMask& GetEventMask(art::Thread* thread);
   EventMask* GetEventMaskOrNull(art::Thread* thread);
-  void EnableEvent(art::Thread* thread, ArtJvmtiEvent event);
-  void DisableEvent(art::Thread* thread, ArtJvmtiEvent event);
+  // Circular dependencies mean we cannot see the definition of ArtJvmTiEnv so the mutex is simply
+  // asserted in the function.
+  // Note that the 'env' passed in must be the same env this EventMasks is associated with.
+  void EnableEvent(ArtJvmTiEnv* env, art::Thread* thread, ArtJvmtiEvent event);
+      // REQUIRES(env->event_info_mutex_);
+  // Circular dependencies mean we cannot see the definition of ArtJvmTiEnv so the mutex is simply
+  // asserted in the function.
+  // Note that the 'env' passed in must be the same env this EventMasks is associated with.
+  void DisableEvent(ArtJvmTiEnv* env, art::Thread* thread, ArtJvmtiEvent event);
+      // REQUIRES(env->event_info_mutex_);
   bool IsEnabledAnywhere(ArtJvmtiEvent event);
   // Make any changes to event masks needed for the given capability changes. If caps_added is true
   // then caps is all the newly set capabilities of the jvmtiEnv. If it is false then caps is the
@@ -234,6 +245,8 @@
       REQUIRES(!envs_lock_);
 
  private:
+  void SetupTraceListener(JvmtiMethodTraceListener* listener, ArtJvmtiEvent event, bool enable);
+
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
   inline std::vector<impl::EventHandlerFunc<kEvent>> CollectEvents(art::Thread* thread,
diff --git a/openjdkjvmti/jvmti_allocator.h b/openjdkjvmti/jvmti_allocator.h
index 11af7b6..bd4c85b 100644
--- a/openjdkjvmti/jvmti_allocator.h
+++ b/openjdkjvmti/jvmti_allocator.h
@@ -32,8 +32,9 @@
 #ifndef ART_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
 #define ART_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
 
-#include "base/logging.h"
-#include "base/macros.h"
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+
 #include "jvmti.h"
 
 #include "ti_allocator.h"
diff --git a/openjdkjvmti/jvmti_weak_table-inl.h b/openjdkjvmti/jvmti_weak_table-inl.h
index 5d20946..6990042 100644
--- a/openjdkjvmti/jvmti_weak_table-inl.h
+++ b/openjdkjvmti/jvmti_weak_table-inl.h
@@ -36,8 +36,9 @@
 
 #include <limits>
 
+#include <android-base/logging.h>
+
 #include "art_jvmti.h"
-#include "base/logging.h"
 #include "gc/allocation_listener.h"
 #include "instrumentation.h"
 #include "jni_env_ext-inl.h"
diff --git a/openjdkjvmti/ti_class_loader.cc b/openjdkjvmti/ti_class_loader.cc
index b551b55..701ba80 100644
--- a/openjdkjvmti/ti_class_loader.cc
+++ b/openjdkjvmti/ti_class_loader.cc
@@ -33,11 +33,11 @@
 
 #include <limits>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_jvmti.h"
-#include "base/logging.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "events-inl.h"
diff --git a/openjdkjvmti/ti_ddms.cc b/openjdkjvmti/ti_ddms.cc
index 500a453..0b4906d 100644
--- a/openjdkjvmti/ti_ddms.cc
+++ b/openjdkjvmti/ti_ddms.cc
@@ -49,14 +49,16 @@
                                  /*out*/jint* type_out,
                                  /*out*/jint* data_length_out,
                                  /*out*/jbyte** data_out) {
-  constexpr uint32_t kDdmHeaderSize = sizeof(uint32_t) * 2;
-  if (env == nullptr || data_in == nullptr || data_out == nullptr || data_length_out == nullptr) {
+  if (env == nullptr || type_out == nullptr || data_out == nullptr || data_length_out == nullptr) {
     return ERR(NULL_POINTER);
-  } else if (length_in < static_cast<jint>(kDdmHeaderSize)) {
-    // need to get type and length at least.
+  } else if (data_in == nullptr && length_in != 0) {
+    // Data-in shouldn't be null if we have data.
     return ERR(ILLEGAL_ARGUMENT);
   }
 
+  *data_length_out = 0;
+  *data_out = nullptr;
+
   art::Thread* self = art::Thread::Current();
   art::ScopedThreadStateChange(self, art::ThreadState::kNative);
 
@@ -71,13 +73,15 @@
     return ERR(INTERNAL);
   } else {
     jvmtiError error = OK;
-    JvmtiUniquePtr<jbyte[]> ret = AllocJvmtiUniquePtr<jbyte[]>(env, out_data.size(), &error);
-    if (error != OK) {
-      return error;
+    if (!out_data.empty()) {
+      JvmtiUniquePtr<jbyte[]> ret = AllocJvmtiUniquePtr<jbyte[]>(env, out_data.size(), &error);
+      if (error != OK) {
+        return error;
+      }
+      memcpy(ret.get(), out_data.data(), out_data.size());
+      *data_out = ret.release();
+      *data_length_out = static_cast<jint>(out_data.size());
     }
-    memcpy(ret.get(), out_data.data(), out_data.size());
-    *data_out = ret.release();
-    *data_length_out = static_cast<jint>(out_data.size());
     return OK;
   }
 }
diff --git a/openjdkjvmti/ti_extension.cc b/openjdkjvmti/ti_extension.cc
index afd0723..79a8cd6 100644
--- a/openjdkjvmti/ti_extension.cc
+++ b/openjdkjvmti/ti_extension.cc
@@ -216,7 +216,7 @@
       {
         { "type_in", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false },
         { "length_in", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false },
-        { "data_in", JVMTI_KIND_IN_BUF, JVMTI_TYPE_JBYTE, false },
+        { "data_in", JVMTI_KIND_IN_BUF, JVMTI_TYPE_JBYTE, true },
         { "type_out", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false },
         { "data_len_out", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false },
         { "data_out", JVMTI_KIND_ALLOC_BUF, JVMTI_TYPE_JBYTE, false }
diff --git a/openjdkjvmti/ti_method.cc b/openjdkjvmti/ti_method.cc
index 448ce41..4444853 100644
--- a/openjdkjvmti/ti_method.cc
+++ b/openjdkjvmti/ti_method.cc
@@ -37,6 +37,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/mutex-inl.h"
+#include "code_item_accessors-inl.h"
 #include "dex_file_annotations.h"
 #include "dex_file_types.h"
 #include "events-inl.h"
@@ -190,12 +191,17 @@
   }
 
   art::ScopedObjectAccess soa(art::Thread::Current());
-  const art::DexFile* dex_file = art_method->GetDexFile();
-  const art::DexFile::CodeItem* code_item = art_method->GetCodeItem();
-  // TODO code_item == nullptr means that the method is abstract (or native, but we check that
+
+  const art::DexFile* const dex_file = art_method->GetDexFile();
+  if (dex_file == nullptr) {
+    return ERR(ABSENT_INFORMATION);
+  }
+
+  // TODO HasCodeItem == false means that the method is abstract (or native, but we check that
   // earlier). We should check what is returned by the RI in this situation since it's not clear
   // what the appropriate return value is from the spec.
-  if (dex_file == nullptr || code_item == nullptr) {
+  art::CodeItemDebugInfoAccessor accessor(art_method);
+  if (!accessor.HasCodeItem()) {
     return ERR(ABSENT_INFORMATION);
   }
 
@@ -260,9 +266,10 @@
   };
 
   LocalVariableContext context(env);
-  uint32_t debug_info_offset = art::OatFile::GetDebugInfoOffset(*dex_file, code_item);
-  if (!dex_file->DecodeDebugLocalInfo(code_item,
-                                      debug_info_offset,
+  if (!dex_file->DecodeDebugLocalInfo(accessor.RegistersSize(),
+                                      accessor.InsSize(),
+                                      accessor.InsnsSizeInCodeUnits(),
+                                      accessor.DebugInfoOffset(),
                                       art_method->IsStatic(),
                                       art_method->GetDexMethodIndex(),
                                       LocalVariableContext::Callback,
@@ -462,7 +469,7 @@
   art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
   DCHECK(!art_method->IsRuntimeMethod());
 
-  const art::DexFile::CodeItem* code_item;
+  art::CodeItemDebugInfoAccessor accessor;
   const art::DexFile* dex_file;
   {
     art::ScopedObjectAccess soa(art::Thread::Current());
@@ -477,15 +484,14 @@
       return ERR(NULL_POINTER);
     }
 
-    code_item = art_method->GetCodeItem();
+    accessor = art::CodeItemDebugInfoAccessor(art_method);
     dex_file = art_method->GetDexFile();
-    DCHECK(code_item != nullptr) << art_method->PrettyMethod() << " " << dex_file->GetLocation();
+    DCHECK(accessor.HasCodeItem()) << art_method->PrettyMethod() << " " << dex_file->GetLocation();
   }
 
   LineNumberContext context;
-  uint32_t debug_info_offset = art::OatFile::GetDebugInfoOffset(*dex_file, code_item);
   bool success = dex_file->DecodeDebugPositionInfo(
-      code_item, debug_info_offset, CollectLineNumbers, &context);
+      accessor.DebugInfoOffset(), CollectLineNumbers, &context);
   if (!success) {
     return ERR(ABSENT_INFORMATION);
   }
@@ -613,8 +619,11 @@
                          /*out*/art::Primitive::Type* type)
       REQUIRES(art::Locks::mutator_lock_) {
     const art::DexFile* dex_file = method->GetDexFile();
-    const art::DexFile::CodeItem* code_item = method->GetCodeItem();
-    if (dex_file == nullptr || code_item == nullptr) {
+    if (dex_file == nullptr) {
+      return ERR(OPAQUE_FRAME);
+    }
+    art::CodeItemDebugInfoAccessor accessor(method);
+    if (!accessor.HasCodeItem()) {
       return ERR(OPAQUE_FRAME);
     }
 
@@ -653,9 +662,10 @@
     };
 
     GetLocalVariableInfoContext context(slot_, dex_pc, descriptor, type);
-    uint32_t debug_info_offset = art::OatFile::GetDebugInfoOffset(*dex_file, code_item);
-    if (!dex_file->DecodeDebugLocalInfo(code_item,
-                                        debug_info_offset,
+    if (!dex_file->DecodeDebugLocalInfo(accessor.RegistersSize(),
+                                        accessor.InsSize(),
+                                        accessor.InsnsSizeInCodeUnits(),
+                                        accessor.DebugInfoOffset(),
                                         method->IsStatic(),
                                         method->GetDexMethodIndex(),
                                         GetLocalVariableInfoContext::Callback,
diff --git a/openjdkjvmti/ti_redefine.cc b/openjdkjvmti/ti_redefine.cc
index 5b125f6..c18b354 100644
--- a/openjdkjvmti/ti_redefine.cc
+++ b/openjdkjvmti/ti_redefine.cc
@@ -33,13 +33,13 @@
 
 #include <limits>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_jvmti.h"
 #include "art_method-inl.h"
 #include "base/array_ref.h"
-#include "base/logging.h"
 #include "base/stringpiece.h"
 #include "class_linker-inl.h"
 #include "debugger.h"
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index b7b81ce..555c5a7 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -31,10 +31,11 @@
 
 #include "ti_thread.h"
 
-#include "android-base/strings.h"
+#include <android-base/logging.h>
+#include <android-base/strings.h>
+
 #include "art_field-inl.h"
 #include "art_jvmti.h"
-#include "base/logging.h"
 #include "base/mutex.h"
 #include "events-inl.h"
 #include "gc/system_weak.h"
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index ae82d72..eb648cb 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -30,6 +30,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/dumpable.h"
+#include "base/logging.h"  // For InitLogging.
 #include "base/memory_tool.h"
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
diff --git a/profman/profman.cc b/profman/profman.cc
index a5a5546..0bef205 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -32,6 +32,7 @@
 #include "android-base/strings.h"
 
 #include "base/dumpable.h"
+#include "base/logging.h"  // For InitLogging.
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
diff --git a/runtime/Android.bp b/runtime/Android.bp
index a136ccb..6477347 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -39,6 +39,7 @@
         "base/hex_dump.cc",
         "base/logging.cc",
         "base/mutex.cc",
+        "base/runtime_debug.cc",
         "base/safe_copy.cc",
         "base/scoped_arena_allocator.cc",
         "base/scoped_flock.cc",
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index fa9aa46..b980296 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_ARM_CONTEXT_ARM_H_
 #define ART_RUNTIME_ARCH_ARM_CONTEXT_ARM_H_
 
+#include <android-base/logging.h>
+
 #include "arch/context.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "registers_arm.h"
 
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index ef2b342..315bf95 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -21,7 +21,7 @@
 #include "art_method.h"
 #include "base/enums.h"
 #include "base/hex_dump.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "globals.h"
 #include "thread-current-inl.h"
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index b789fc7..801254f 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -25,10 +25,9 @@
 
 #include <fstream>
 
-#include "android-base/stringprintf.h"
-#include "android-base/strings.h"
-
-#include "base/logging.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
 
 #if defined(__arm__)
 extern "C" bool artCheckForArmSdivInstruction();
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 6ff8dd6..6ec9c48 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1783,7 +1783,9 @@
     .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
 
 .Lexception_in_native:
-    ldr sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
+    ldr ip, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
+    add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
+    mov sp, ip
     .cfi_def_cfa_register sp
     # This will create a new save-all frame, required by the runtime.
     DELIVER_PENDING_EXCEPTION
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index ff4f81b..18585c7 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -16,9 +16,10 @@
 
 #include "thread.h"
 
+#include <android-base/logging.h>
+
 #include "asm_support_arm.h"
 #include "base/enums.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 36aded0..e64cfb8 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_ARM64_CONTEXT_ARM64_H_
 #define ART_RUNTIME_ARCH_ARM64_CONTEXT_ARM64_H_
 
+#include <android-base/logging.h>
+
 #include "arch/context.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "registers_arm64.h"
 
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index d535c7e..d282c8c 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -21,7 +21,7 @@
 #include "art_method.h"
 #include "base/enums.h"
 #include "base/hex_dump.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "globals.h"
 #include "registers_arm64.h"
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index d830ccf..9e9cb16 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -19,10 +19,10 @@
 #include <fstream>
 #include <sstream>
 
-#include "android-base/stringprintf.h"
-#include "android-base/strings.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
 
-#include "base/logging.h"
 #include "base/stl_util.h"
 
 namespace art {
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 280e593..47efeb9 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2299,7 +2299,7 @@
 .Lexception_in_native:
     // Move to x1 then sp to please assembler.
     ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
-    mov sp, x1
+    add sp, x1, #-1  // Remove the GenericJNI tag.
     .cfi_def_cfa_register sp
     # This will create a new save-all frame, required by the runtime.
     DELIVER_PENDING_EXCEPTION
diff --git a/runtime/arch/arm64/thread_arm64.cc b/runtime/arch/arm64/thread_arm64.cc
index 3483b70..19c4a6a 100644
--- a/runtime/arch/arm64/thread_arm64.cc
+++ b/runtime/arch/arm64/thread_arm64.cc
@@ -16,9 +16,10 @@
 
 #include "thread.h"
 
+#include <android-base/logging.h>
+
 #include "asm_support_arm64.h"
 #include "base/enums.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/arch/code_offset.h b/runtime/arch/code_offset.h
index ab04b1e..8e8dde4 100644
--- a/runtime/arch/code_offset.h
+++ b/runtime/arch/code_offset.h
@@ -19,8 +19,10 @@
 
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "instruction_set.h"
 
 namespace art {
diff --git a/runtime/arch/instruction_set_features_test.cc b/runtime/arch/instruction_set_features_test.cc
index 67e2f35..1e3275c 100644
--- a/runtime/arch/instruction_set_features_test.cc
+++ b/runtime/arch/instruction_set_features_test.cc
@@ -19,12 +19,11 @@
 #include <gtest/gtest.h>
 
 #ifdef ART_TARGET_ANDROID
-#include "android-base/properties.h"
+#include <android-base/properties.h>
 #endif
 
-#include "android-base/stringprintf.h"
-
-#include "base/logging.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 namespace art {
 
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 7dcff63..7e073b2 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_MIPS_CONTEXT_MIPS_H_
 #define ART_RUNTIME_ARCH_MIPS_CONTEXT_MIPS_H_
 
+#include <android-base/logging.h>
+
 #include "arch/context.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "registers_mips.h"
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index dca3382..209f367 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -18,6 +18,7 @@
 
 #include "arch/mips/asm_support_mips.h"
 #include "atomic.h"
+#include "base/logging.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/math_entrypoints.h"
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 6dce54e..f82dc08 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -20,7 +20,7 @@
 #include "art_method.h"
 #include "base/callee_save_type.h"
 #include "base/hex_dump.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "globals.h"
 #include "quick_method_frame_info_mips.h"
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 6d4145b..952ed25 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -19,10 +19,9 @@
 #include <fstream>
 #include <sstream>
 
-#include "android-base/stringprintf.h"
-#include "android-base/strings.h"
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
 
-#include "base/logging.h"
 #include "base/stl_util.h"
 
 namespace art {
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index ee539ed..76bc639 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_MIPS_INSTRUCTION_SET_FEATURES_MIPS_H_
 #define ART_RUNTIME_ARCH_MIPS_INSTRUCTION_SET_FEATURES_MIPS_H_
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set_features.h"
-#include "base/logging.h"
 #include "base/macros.h"
 
 namespace art {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 489c52c..fc77a64 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -2283,7 +2283,8 @@
     nop
 
 2:
-    lw      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+    lw      $t0, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+    addiu   $sp, $t0, -1  // Remove the GenericJNI tag.
     move    $gp, $s3               # restore $gp from $s3
     # This will create a new save-all frame, required by the runtime.
     DELIVER_PENDING_EXCEPTION
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 57af150..f500b58 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -19,7 +19,8 @@
 
 #include <iosfwd>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index 0a9ab7a..0be7a7f 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -16,9 +16,10 @@
 
 #include "thread.h"
 
+#include <android-base/logging.h>
+
 #include "asm_support_mips.h"
 #include "base/enums.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/arch/mips64/context_mips64.h b/runtime/arch/mips64/context_mips64.h
index 89fbf8f..b2a6138 100644
--- a/runtime/arch/mips64/context_mips64.h
+++ b/runtime/arch/mips64/context_mips64.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_MIPS64_CONTEXT_MIPS64_H_
 #define ART_RUNTIME_ARCH_MIPS64_CONTEXT_MIPS64_H_
 
+#include <android-base/logging.h>
+
 #include "arch/context.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "registers_mips64.h"
 
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index bdce520..ba6fff0 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -21,7 +21,7 @@
 #include "art_method.h"
 #include "base/callee_save_type.h"
 #include "base/hex_dump.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "globals.h"
 #include "quick_method_frame_info_mips64.h"
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 98ffe65..3fb83d9 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -2158,7 +2158,8 @@
     dmtc1   $v0, $f0               # place return value to FP return value
 
 1:
-    ld      $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+    ld      $t0, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
+    daddiu  $sp, $t0, -1  // Remove the GenericJNI tag.
     # This will create a new save-all frame, required by the runtime.
     DELIVER_PENDING_EXCEPTION
 END art_quick_generic_jni_trampoline
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 30de2cc..bca260a 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -19,7 +19,8 @@
 
 #include <iosfwd>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/runtime/arch/mips64/thread_mips64.cc b/runtime/arch/mips64/thread_mips64.cc
index 3ce5e50..c1c390b 100644
--- a/runtime/arch/mips64/thread_mips64.cc
+++ b/runtime/arch/mips64/thread_mips64.cc
@@ -16,9 +16,10 @@
 
 #include "thread.h"
 
+#include <android-base/logging.h>
+
 #include "asm_support_mips64.h"
 #include "base/enums.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index 303dfe3..0ebb22b 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_X86_CONTEXT_X86_H_
 #define ART_RUNTIME_ARCH_X86_CONTEXT_X86_H_
 
+#include <android-base/logging.h>
+
 #include "arch/context.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "registers_x86.h"
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 527332f..e6a9124 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -21,7 +21,7 @@
 #include "art_method.h"
 #include "base/enums.h"
 #include "base/hex_dump.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "base/safe_copy.h"
 #include "globals.h"
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index ea5a90d..9846251 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -19,11 +19,11 @@
 #include <fstream>
 #include <sstream>
 
-#include "android-base/stringprintf.h"
-#include "android-base/strings.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
 
 #include "arch/x86_64/instruction_set_features_x86_64.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 25716dc..a46ceeb 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1969,7 +1969,9 @@
     punpckldq %xmm1, %xmm0
     ret
 .Lexception_in_native:
-    movl %fs:THREAD_TOP_QUICK_FRAME_OFFSET, %esp
+    pushl %fs:THREAD_TOP_QUICK_FRAME_OFFSET
+    addl LITERAL(-1), (%esp)  // Remove the GenericJNI tag.
+    movl (%esp), %esp
     // Do a call to push a new save-all frame required by the runtime.
     call .Lexception_call
 .Lexception_call:
diff --git a/runtime/arch/x86/registers_x86.h b/runtime/arch/x86/registers_x86.h
index 23027ed..ded3520 100644
--- a/runtime/arch/x86/registers_x86.h
+++ b/runtime/arch/x86/registers_x86.h
@@ -19,7 +19,8 @@
 
 #include <iosfwd>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
index f8e2845..d242693 100644
--- a/runtime/arch/x86_64/context_x86_64.h
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
 #define ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
 
+#include <android-base/logging.h>
+
 #include "arch/context.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "registers_x86_64.h"
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 2c3da90..463e5a2 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1958,7 +1958,9 @@
     movq %rax, %xmm0
     ret
 .Lexception_in_native:
-    movq %gs:THREAD_TOP_QUICK_FRAME_OFFSET, %rsp
+    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
+    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
+    movq (%rsp), %rsp
     CFI_DEF_CFA_REGISTER(rsp)
     // Do a call to push a new save-all frame required by the runtime.
     call .Lexception_call
diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h
index dda1d5f..4f22431 100644
--- a/runtime/arch/x86_64/registers_x86_64.h
+++ b/runtime/arch/x86_64/registers_x86_64.h
@@ -19,7 +19,8 @@
 
 #include <iosfwd>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 4a328e8..2b18577 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -19,7 +19,8 @@
 
 #include "art_field.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "class_linker.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
@@ -299,23 +300,17 @@
   return GetTypeAsPrimitiveType() != Primitive::kPrimNot;
 }
 
-inline ObjPtr<mirror::Class> ArtField::LookupType() {
+inline ObjPtr<mirror::Class> ArtField::LookupResolvedType() {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
   const uint32_t field_index = GetDexFieldIndex();
   ObjPtr<mirror::Class> declaring_class = GetDeclaringClass();
   if (UNLIKELY(declaring_class->IsProxyClass())) {
     return ProxyFindSystemClass(GetTypeDescriptor());
   }
-  ObjPtr<mirror::DexCache>  dex_cache = declaring_class->GetDexCache();
-  const DexFile* const dex_file = dex_cache->GetDexFile();
-  dex::TypeIndex type_idx = dex_file->GetFieldId(field_index).type_idx_;
-  ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
-  if (UNLIKELY(type == nullptr)) {
-    type = Runtime::Current()->GetClassLinker()->LookupResolvedType(
-        *dex_file, type_idx, dex_cache, declaring_class->GetClassLoader());
-    DCHECK(!Thread::Current()->IsExceptionPending());
-  }
-  return type.Ptr();
+  ObjPtr<mirror::Class> type = Runtime::Current()->GetClassLinker()->LookupResolvedType(
+      declaring_class->GetDexFile().GetFieldId(field_index).type_idx_, declaring_class);
+  DCHECK(!Thread::Current()->IsExceptionPending());
+  return type;
 }
 
 inline ObjPtr<mirror::Class> ArtField::ResolveType() {
@@ -324,15 +319,9 @@
   if (UNLIKELY(declaring_class->IsProxyClass())) {
     return ProxyFindSystemClass(GetTypeDescriptor());
   }
-  auto* dex_cache = declaring_class->GetDexCache();
-  const DexFile* const dex_file = dex_cache->GetDexFile();
-  dex::TypeIndex type_idx = dex_file->GetFieldId(field_index).type_idx_;
-  ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
-  if (UNLIKELY(type == nullptr)) {
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    type = class_linker->ResolveType(*dex_file, type_idx, declaring_class);
-    DCHECK_EQ(type == nullptr, Thread::Current()->IsExceptionPending());
-  }
+  ObjPtr<mirror::Class> type = Runtime::Current()->GetClassLinker()->ResolveType(
+      declaring_class->GetDexFile().GetFieldId(field_index).type_idx_, declaring_class);
+  DCHECK_EQ(type == nullptr, Thread::Current()->IsExceptionPending());
   return type;
 }
 
@@ -352,11 +341,10 @@
   auto dex_field_index = GetDexFieldIndex();
   CHECK_NE(dex_field_index, dex::kDexNoIndex);
   ObjPtr<mirror::DexCache> dex_cache = GetDexCache();
-  const auto* dex_file = dex_cache->GetDexFile();
-  const auto& field_id = dex_file->GetFieldId(dex_field_index);
+  const DexFile::FieldId& field_id = dex_cache->GetDexFile()->GetFieldId(dex_field_index);
   ObjPtr<mirror::String> name = dex_cache->GetResolvedString(field_id.name_idx_);
   if (resolve && name == nullptr) {
-    name = ResolveGetStringName(self, *dex_file, field_id.name_idx_, dex_cache);
+    name = ResolveGetStringName(self, field_id.name_idx_, dex_cache);
   }
   return name;
 }
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index bc728f4..dbba2b0 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -45,17 +45,17 @@
 
 ObjPtr<mirror::Class> ArtField::ProxyFindSystemClass(const char* descriptor) {
   DCHECK(GetDeclaringClass()->IsProxyClass());
-  return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(), descriptor);
+  ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupClass(
+      Thread::Current(), descriptor, /* class_loader */ nullptr);
+  DCHECK(klass != nullptr);
+  return klass;
 }
 
 ObjPtr<mirror::String> ArtField::ResolveGetStringName(Thread* self,
-                                                      const DexFile& dex_file,
                                                       dex::StringIndex string_idx,
                                                       ObjPtr<mirror::DexCache> dex_cache) {
   StackHandleScope<1> hs(self);
-  return Runtime::Current()->GetClassLinker()->ResolveString(dex_file,
-                                                             string_idx,
-                                                             hs.NewHandle(dex_cache));
+  return Runtime::Current()->GetClassLinker()->ResolveString(string_idx, hs.NewHandle(dex_cache));
 }
 
 std::string ArtField::PrettyField(ArtField* f, bool with_type) {
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 866bf0b..8d2f9ff 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -205,7 +205,7 @@
 
   bool IsPrimitiveType() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ObjPtr<mirror::Class> LookupType() REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<mirror::Class> LookupResolvedType() REQUIRES_SHARED(Locks::mutator_lock_);
   ObjPtr<mirror::Class> ResolveType() REQUIRES_SHARED(Locks::mutator_lock_);
 
   size_t FieldSize() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -234,7 +234,6 @@
   ObjPtr<mirror::Class> ProxyFindSystemClass(const char* descriptor)
       REQUIRES_SHARED(Locks::mutator_lock_);
   ObjPtr<mirror::String> ResolveGetStringName(Thread* self,
-                                              const DexFile& dex_file,
                                               dex::StringIndex string_idx,
                                               ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 50913de..869394c 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -21,7 +21,6 @@
 
 #include "art_field.h"
 #include "base/callee_save_type.h"
-#include "base/logging.h"
 #include "class_linker-inl.h"
 #include "code_item_accessors-inl.h"
 #include "common_throws.h"
@@ -95,33 +94,28 @@
   return method_index_;
 }
 
+template <ReadBarrierOption kReadBarrierOption>
 inline uint32_t ArtMethod::GetDexMethodIndex() {
   if (kCheckDeclaringClassState) {
-    CHECK(IsRuntimeMethod() || GetDeclaringClass()->IsIdxLoaded() ||
-          GetDeclaringClass()->IsErroneous());
+    CHECK(IsRuntimeMethod() ||
+          GetDeclaringClass<kReadBarrierOption>()->IsIdxLoaded() ||
+          GetDeclaringClass<kReadBarrierOption>()->IsErroneous());
   }
   return GetDexMethodIndexUnchecked();
 }
 
 inline ObjPtr<mirror::Class> ArtMethod::LookupResolvedClassFromTypeIndex(dex::TypeIndex type_idx) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
-  ObjPtr<mirror::DexCache> dex_cache = GetDexCache();
-  ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
-  if (UNLIKELY(type == nullptr)) {
-    type = Runtime::Current()->GetClassLinker()->LookupResolvedType(
-        *dex_cache->GetDexFile(), type_idx, dex_cache, GetClassLoader());
-  }
-  return type.Ptr();
+  ObjPtr<mirror::Class> type =
+      Runtime::Current()->GetClassLinker()->LookupResolvedType(type_idx, this);
+  DCHECK(!Thread::Current()->IsExceptionPending());
+  return type;
 }
 
 inline ObjPtr<mirror::Class> ArtMethod::ResolveClassFromTypeIndex(dex::TypeIndex type_idx) {
-  ObjPtr<mirror::DexCache> dex_cache = GetDexCache();
-  ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
-  if (UNLIKELY(type == nullptr)) {
-    type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, this);
-    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
-  }
-  return type.Ptr();
+  ObjPtr<mirror::Class> type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, this);
+  DCHECK_EQ(type == nullptr, Thread::Current()->IsExceptionPending());
+  return type;
 }
 
 inline bool ArtMethod::CheckIncompatibleClassChange(InvokeType type) {
@@ -202,7 +196,14 @@
 inline const char* ArtMethod::GetShorty(uint32_t* out_length) {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
-  return dex_file->GetMethodShorty(dex_file->GetMethodId(GetDexMethodIndex()), out_length);
+  // Don't do a read barrier in the DCHECK() inside GetDexMethodIndex() as GetShorty()
+  // can be called when the declaring class is about to be unloaded and cannot be added
+  // to the mark stack (subsequent GC assertion would fail).
+  // It is safe to avoid the read barrier as the ArtMethod is constructed with a declaring
+  // Class already satisfying the DCHECK() inside GetDexMethodIndex(), so even if that copy
+  // of declaring class becomes a from-space object, it shall satisfy the DCHECK().
+  return dex_file->GetMethodShorty(dex_file->GetMethodId(GetDexMethodIndex<kWithoutReadBarrier>()),
+                                   out_length);
 }
 
 inline const Signature ArtMethod::GetSignature() {
@@ -297,9 +298,7 @@
 inline const char* ArtMethod::GetReturnTypeDescriptor() {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
-  const DexFile::MethodId& method_id = dex_file->GetMethodId(GetDexMethodIndex());
-  const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
-  return dex_file->GetTypeDescriptor(dex_file->GetTypeId(proto_id.return_type_idx_));
+  return dex_file->GetTypeDescriptor(dex_file->GetTypeId(GetReturnTypeIndex()));
 }
 
 inline Primitive::Type ArtMethod::GetReturnTypePrimitive() {
@@ -319,7 +318,7 @@
 
 template <ReadBarrierOption kReadBarrierOption>
 inline mirror::DexCache* ArtMethod::GetDexCache() {
-  if (LIKELY(!IsObsolete())) {
+  if (LIKELY(!IsObsolete<kReadBarrierOption>())) {
     mirror::Class* klass = GetDeclaringClass<kReadBarrierOption>();
     return klass->GetDexCache<kDefaultVerifyFlags, kReadBarrierOption>();
   } else {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index fa0c501..9005120 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -134,15 +134,14 @@
   return dex_file->GetIndexForClassDef(*class_def);
 }
 
-mirror::String* ArtMethod::GetNameAsString(Thread* self) {
+ObjPtr<mirror::String> ArtMethod::GetNameAsString(Thread* self) {
   CHECK(!IsProxyMethod());
   StackHandleScope<1> hs(self);
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache()));
   auto* dex_file = dex_cache->GetDexFile();
   uint32_t dex_method_idx = GetDexMethodIndex();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
-  return Runtime::Current()->GetClassLinker()->ResolveString(*dex_file, method_id.name_idx_,
-                                                             dex_cache);
+  return Runtime::Current()->GetClassLinker()->ResolveString(method_id.name_idx_, dex_cache);
 }
 
 void ArtMethod::ThrowInvocationTimeError() {
@@ -550,8 +549,8 @@
   }
   auto* cl = Runtime::Current()->GetClassLinker();
   for (size_t i = 0; i < count; ++i) {
-    auto type_idx = proto_params->GetTypeItem(i).type_idx_;
-    auto* type = cl->ResolveType(type_idx, this);
+    dex::TypeIndex type_idx = proto_params->GetTypeItem(i).type_idx_;
+    ObjPtr<mirror::Class> type = cl->ResolveType(type_idx, this);
     if (type == nullptr) {
       Thread::Current()->AssertPendingException();
       return false;
@@ -587,11 +586,6 @@
   CHECK(existing_entry_point != nullptr) << PrettyMethod() << "@" << this;
   ClassLinker* class_linker = runtime->GetClassLinker();
 
-  if (class_linker->IsQuickGenericJniStub(existing_entry_point)) {
-    // The generic JNI does not have any method header.
-    return nullptr;
-  }
-
   if (existing_entry_point == GetQuickProxyInvokeHandler()) {
     DCHECK(IsProxyMethod() && !IsConstructor());
     // The proxy entry point does not have any method header.
@@ -599,7 +593,8 @@
   }
 
   // Check whether the current entry point contains this pc.
-  if (!class_linker->IsQuickResolutionStub(existing_entry_point) &&
+  if (!class_linker->IsQuickGenericJniStub(existing_entry_point) &&
+      !class_linker->IsQuickResolutionStub(existing_entry_point) &&
       !class_linker->IsQuickToInterpreterBridge(existing_entry_point)) {
     OatQuickMethodHeader* method_header =
         OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
@@ -632,19 +627,13 @@
   OatFile::OatMethod oat_method =
       FindOatMethodFor(this, class_linker->GetImagePointerSize(), &found);
   if (!found) {
-    if (class_linker->IsQuickResolutionStub(existing_entry_point)) {
-      // We are running the generic jni stub, but the entry point of the method has not
-      // been updated yet.
-      DCHECK_EQ(pc, 0u) << "Should be a downcall";
-      DCHECK(IsNative());
-      return nullptr;
-    }
-    if (existing_entry_point == GetQuickInstrumentationEntryPoint()) {
-      // We are running the generic jni stub, but the method is being instrumented.
-      // NB We would normally expect the pc to be zero but we can have non-zero pc's if
-      // instrumentation is installed or removed during the call which is using the generic jni
-      // trampoline.
-      DCHECK(IsNative());
+    if (IsNative()) {
+      // We are running the GenericJNI stub. The entrypoint may point
+      // to different entrypoints or to a JIT-compiled JNI stub.
+      DCHECK(class_linker->IsQuickGenericJniStub(existing_entry_point) ||
+             class_linker->IsQuickResolutionStub(existing_entry_point) ||
+             existing_entry_point == GetQuickInstrumentationEntryPoint() ||
+             (jit != nullptr && jit->GetCodeCache()->ContainsPc(existing_entry_point)));
       return nullptr;
     }
     // Only for unit tests.
@@ -702,13 +691,15 @@
   declaring_class_ = GcRoot<mirror::Class>(const_cast<ArtMethod*>(src)->GetDeclaringClass());
 
   // If the entry point of the method we are copying from is from JIT code, we just
-  // put the entry point of the new method to interpreter. We could set the entry point
-  // to the JIT code, but this would require taking the JIT code cache lock to notify
-  // it, which we do not want at this level.
+  // put the entry point of the new method to interpreter or GenericJNI. We could set
+  // the entry point to the JIT code, but this would require taking the JIT code cache
+  // lock to notify it, which we do not want at this level.
   Runtime* runtime = Runtime::Current();
   if (runtime->UseJitCompilation()) {
     if (runtime->GetJit()->GetCodeCache()->ContainsPc(GetEntryPointFromQuickCompiledCode())) {
-      SetEntryPointFromQuickCompiledCodePtrSize(GetQuickToInterpreterBridge(), image_pointer_size);
+      SetEntryPointFromQuickCompiledCodePtrSize(
+          src->IsNative() ? GetQuickGenericJniStub() : GetQuickToInterpreterBridge(),
+          image_pointer_size);
     }
   }
   // Clear the profiling info for the same reasons as the JIT code.
diff --git a/runtime/art_method.h b/runtime/art_method.h
index dca6f37..f433223 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -19,11 +19,14 @@
 
 #include <cstddef>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/enums.h"
 #include "base/iteration_range.h"
-#include "base/logging.h"
+#include "base/macros.h"
+#include "base/runtime_debug.h"
 #include "dex_file.h"
 #include "dex_instruction_iterator.h"
 #include "gc_root.h"
@@ -242,8 +245,9 @@
     return (GetAccessFlags() & kAccDefault) != 0;
   }
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObsolete() {
-    return (GetAccessFlags() & kAccObsoleteMethod) != 0;
+    return (GetAccessFlags<kReadBarrierOption>() & kAccObsoleteMethod) != 0;
   }
 
   void SetIsObsolete() {
@@ -376,6 +380,7 @@
   ALWAYS_INLINE uint32_t GetDexMethodIndexUnchecked() {
     return dex_method_index_;
   }
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetDexMethodIndex() REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetDexMethodIndex(uint32_t new_idx) {
@@ -460,12 +465,11 @@
   }
 
   ProfilingInfo* GetProfilingInfo(PointerSize pointer_size) REQUIRES_SHARED(Locks::mutator_lock_) {
-    // Don't do a read barrier in the DCHECK, as GetProfilingInfo is called in places
-    // where the declaring class is treated as a weak reference (accessing it with
-    // a read barrier would either prevent unloading the class, or crash the runtime if
-    // the GC wants to unload it).
-    DCHECK(!IsNative<kWithoutReadBarrier>());
-    if (UNLIKELY(IsProxyMethod())) {
+    // Don't do a read barrier in the DCHECK() inside GetAccessFlags() called by IsNative(),
+    // as GetProfilingInfo is called in places where the declaring class is treated as a weak
+    // reference (accessing it with a read barrier would either prevent unloading the class,
+    // or crash the runtime if the GC wants to unload it).
+    if (UNLIKELY(IsNative<kWithoutReadBarrier>()) || UNLIKELY(IsProxyMethod())) {
       return nullptr;
     }
     return reinterpret_cast<ProfilingInfo*>(GetDataPtrSize(pointer_size));
@@ -574,7 +578,7 @@
 
   ALWAYS_INLINE const char* GetName() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::String* GetNameAsString(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<mirror::String> GetNameAsString(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
 
   const DexFile::CodeItem* GetCodeItem() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/atomic.h b/runtime/atomic.h
index d8621cc..ec3eb6d 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -22,8 +22,9 @@
 #include <limits>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
-#include "base/logging.h"
 #include "base/macros.h"
 
 namespace art {
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index 9bcda35..4329a5a 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -16,7 +16,9 @@
 
 #include "barrier.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/aborting.h"
 #include "base/mutex.h"
 #include "base/time_utils.h"
 #include "thread.h"
diff --git a/runtime/base/aborting.h b/runtime/base/aborting.h
new file mode 100644
index 0000000..8906c96
--- /dev/null
+++ b/runtime/base/aborting.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ABORTING_H_
+#define ART_RUNTIME_BASE_ABORTING_H_
+
+#include <atomic>
+
+namespace art {
+
+// 0 if not abort, non-zero if an abort is in progress. Used on fatal exit to prevents recursive
+// aborts. Global declaration allows us to disable some error checking to ensure fatal shutdown
+// makes forward progress.
+extern std::atomic<unsigned int> gAborting;
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ABORTING_H_
diff --git a/runtime/base/allocator.cc b/runtime/base/allocator.cc
index bb00638..2da88c3 100644
--- a/runtime/base/allocator.cc
+++ b/runtime/base/allocator.cc
@@ -19,8 +19,9 @@
 #include <inttypes.h>
 #include <stdlib.h>
 
+#include <android-base/logging.h>
+
 #include "atomic.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 2e35f8a..cc413c5 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -23,7 +23,8 @@
 #include <iomanip>
 #include <numeric>
 
-#include "logging.h"
+#include <android-base/logging.h>
+
 #include "mem_map.h"
 #include "mutex.h"
 #include "systrace.h"
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index a327cb0..9e03658 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -20,11 +20,11 @@
 #include <stddef.h>
 #include <stdint.h>
 
-#include "base/bit_utils.h"
-#include "base/dchecked_vector.h"
-#include "base/memory_tool.h"
+#include "bit_utils.h"
+#include "dchecked_vector.h"
 #include "debug_stack.h"
 #include "macros.h"
+#include "memory_tool.h"
 #include "mutex.h"
 
 namespace art {
diff --git a/runtime/base/arena_object.h b/runtime/base/arena_object.h
index ed00bab..06884c2 100644
--- a/runtime/base/arena_object.h
+++ b/runtime/base/arena_object.h
@@ -17,8 +17,10 @@
 #ifndef ART_RUNTIME_BASE_ARENA_OBJECT_H_
 #define ART_RUNTIME_BASE_ARENA_OBJECT_H_
 
-#include "base/arena_allocator.h"
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "arena_allocator.h"
+#include "macros.h"
 #include "scoped_arena_allocator.h"
 
 namespace art {
diff --git a/runtime/base/array_ref.h b/runtime/base/array_ref.h
index 630a036..ef86512 100644
--- a/runtime/base/array_ref.h
+++ b/runtime/base/array_ref.h
@@ -20,7 +20,7 @@
 #include <type_traits>
 #include <vector>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/base/bit_field.h b/runtime/base/bit_field.h
index a80ca28..86007d6 100644
--- a/runtime/base/bit_field.h
+++ b/runtime/base/bit_field.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_BASE_BIT_FIELD_H_
 #define ART_RUNTIME_BASE_BIT_FIELD_H_
 
+#include <android-base/logging.h>
+
 #include "globals.h"
-#include "logging.h"
 
 namespace art {
 
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 5d83654..34cddbf 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -20,7 +20,8 @@
 #include <limits>
 #include <type_traits>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/stl_util_identity.h"
 
 namespace art {
diff --git a/runtime/base/bit_utils_iterator.h b/runtime/base/bit_utils_iterator.h
index 8514de6..2d3d050 100644
--- a/runtime/base/bit_utils_iterator.h
+++ b/runtime/base/bit_utils_iterator.h
@@ -21,9 +21,10 @@
 #include <limits>
 #include <type_traits>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
 #include "base/iteration_range.h"
-#include "base/logging.h"
 #include "base/stl_util.h"
 
 namespace art {
diff --git a/runtime/base/bit_vector-inl.h b/runtime/base/bit_vector-inl.h
index 0e67f77..e67d4e2 100644
--- a/runtime/base/bit_vector-inl.h
+++ b/runtime/base/bit_vector-inl.h
@@ -17,9 +17,11 @@
 #ifndef ART_RUNTIME_BASE_BIT_VECTOR_INL_H_
 #define ART_RUNTIME_BASE_BIT_VECTOR_INL_H_
 
-#include "base/bit_utils.h"
 #include "bit_vector.h"
-#include "logging.h"
+
+#include <android-base/logging.h>
+
+#include "base/bit_utils.h"
 
 namespace art {
 
diff --git a/runtime/base/bounded_fifo.h b/runtime/base/bounded_fifo.h
index 7bcd382..1520770 100644
--- a/runtime/base/bounded_fifo.h
+++ b/runtime/base/bounded_fifo.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_BASE_BOUNDED_FIFO_H_
 #define ART_RUNTIME_BASE_BOUNDED_FIFO_H_
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index 92c493a..ac1a10c 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -24,8 +24,7 @@
 #include <limits>
 #include <type_traits>
 
-#include "base/logging.h"
-#include "base/macros.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h
index 77f0ea2..7236ac3 100644
--- a/runtime/base/dchecked_vector.h
+++ b/runtime/base/dchecked_vector.h
@@ -21,7 +21,7 @@
 #include <type_traits>
 #include <vector>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/base/debug_stack.h b/runtime/base/debug_stack.h
index 886065d..1331e10 100644
--- a/runtime/base/debug_stack.h
+++ b/runtime/base/debug_stack.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_BASE_DEBUG_STACK_H_
 #define ART_RUNTIME_BASE_DEBUG_STACK_H_
 
-#include "base/logging.h"
-#include "base/macros.h"
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+
 #include "globals.h"
 
 namespace art {
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
index dffb9b4..ac2e184 100644
--- a/runtime/base/file_magic.cc
+++ b/runtime/base/file_magic.cc
@@ -20,9 +20,9 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
-#include "base/logging.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file.h"
 
diff --git a/runtime/base/file_utils.cc b/runtime/base/file_utils.cc
index 323a065..db49860 100644
--- a/runtime/base/file_utils.cc
+++ b/runtime/base/file_utils.cc
@@ -89,7 +89,7 @@
   }
 }
 
-bool PrintFileToLog(const std::string& file_name, LogSeverity level) {
+bool PrintFileToLog(const std::string& file_name, android::base::LogSeverity level) {
   File file(file_name, O_RDONLY, false);
   if (!file.IsOpened()) {
     return false;
diff --git a/runtime/base/file_utils.h b/runtime/base/file_utils.h
index 007f3b4..e4555ad 100644
--- a/runtime/base/file_utils.h
+++ b/runtime/base/file_utils.h
@@ -21,13 +21,14 @@
 
 #include <string>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
-#include "base/logging.h"
 
 namespace art {
 
 bool ReadFileToString(const std::string& file_name, std::string* result);
-bool PrintFileToLog(const std::string& file_name, LogSeverity level);
+bool PrintFileToLog(const std::string& file_name, android::base::LogSeverity level);
 
 // Find $ANDROID_ROOT, /system, or abort.
 std::string GetAndroidRoot();
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index c743342..47e6d93 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -25,8 +25,10 @@
 #include <type_traits>
 #include <utility>
 
+#include <android-base/logging.h>
+
 #include "bit_utils.h"
-#include "logging.h"
+#include "macros.h"
 
 namespace art {
 
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index be20920..3ce0140 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -24,6 +24,8 @@
 
 #include "histogram.h"
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
 #include "base/time_utils.h"
 #include "utils.h"
diff --git a/runtime/base/histogram.h b/runtime/base/histogram.h
index e0c921e..7544a9c 100644
--- a/runtime/base/histogram.h
+++ b/runtime/base/histogram.h
@@ -19,7 +19,7 @@
 #include <string>
 #include <vector>
 
-#include "base/logging.h"
+#include <android-base/macros.h>
 
 namespace art {
 
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 4776357..90eb74c 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -20,7 +20,8 @@
 #include <limits>
 #include <sstream>
 
-#include "base/mutex.h"
+#include "aborting.h"
+#include "mutex.h"
 #include "thread-current-inl.h"
 #include "utils.h"
 
@@ -34,55 +35,6 @@
 
 namespace art {
 
-// We test here that the runtime-debug-checks are actually a no-op constexpr false in release
-// builds, as we can't check that in gtests (which are always debug).
-
-#ifdef NDEBUG
-namespace {
-DECLARE_RUNTIME_DEBUG_FLAG(kTestForConstexpr);
-static_assert(!kTestForConstexpr, "Issue with DECLARE_RUNTIME_DEBUG_FLAG in NDEBUG.");
-}
-#endif
-
-// Implementation of runtime debug flags. This should be compile-time optimized away in release
-// builds.
-namespace {
-bool gSlowEnabled = false;  // Default for slow flags is "off."
-
-// Use a function with a static to ensure our vector storage doesn't have initialization order
-// issues.
-std::vector<bool*>& GetFlagPtrs() {
-  static std::vector<bool*> g_flag_ptrs;
-  return g_flag_ptrs;
-}
-
-bool RegisterRuntimeDebugFlagImpl(bool* flag_ptr) {
-  GetFlagPtrs().push_back(flag_ptr);
-  return gSlowEnabled;
-}
-
-void SetRuntimeDebugFlagsEnabledImpl(bool enabled) {
-  gSlowEnabled = enabled;
-  for (bool* flag_ptr : GetFlagPtrs()) {
-    *flag_ptr = enabled;
-  }
-}
-
-}  // namespace
-
-bool RegisterRuntimeDebugFlag(bool* flag_ptr) {
-  if (kIsDebugBuild) {
-    return RegisterRuntimeDebugFlagImpl(flag_ptr);
-  }
-  return false;
-}
-
-void SetRuntimeDebugFlagsEnabled(bool enabled) {
-  if (kIsDebugBuild) {
-    SetRuntimeDebugFlagsEnabledImpl(enabled);
-  }
-}
-
 LogVerbosity gLogVerbosity;
 
 std::atomic<unsigned int> gAborting(0);
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 15f9353..c562bdf 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -53,6 +53,7 @@
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
   bool verifier;
+  bool verifier_debug;   // Only works in debug builds.
   bool image;
   bool systrace_lock_logging;  // Enabled with "-verbose:sys-locks".
   bool agents;
@@ -62,48 +63,6 @@
 // Global log verbosity setting, initialized by InitLogging.
 extern LogVerbosity gLogVerbosity;
 
-// Runtime debug flags are flags that have a runtime component, that is, their value can be changed.
-// This is meant to implement fast vs slow debug builds, in that certain debug flags can be turned
-// on and off. To that effect, expose two macros to help implement and globally drive these flags:
-//
-// In the header, declare a (class) flag like this:
-//
-//   class C {
-//     DECLARE_RUNTIME_DEBUG_FLAG(kFlag);
-//   };
-//
-// This will declare a flag kFlag that is a constexpr false in release builds, and a static field
-// in debug builds. Usage is than uniform as C::kFlag.
-//
-// In the cc file, define the flag like this:
-//
-//   DEFINE_RUNTIME_DEBUG_FLAG(C, kFlag);
-//
-// This will define the static storage, as necessary, and register the flag with the runtime
-// infrastructure to toggle the value.
-
-#ifdef NDEBUG
-#define DECLARE_RUNTIME_DEBUG_FLAG(x) \
-  static constexpr bool x = false;
-// Note: the static_assert in the following only works for public flags. Fix this when we cross
-//       the line at some point.
-#define DEFINE_RUNTIME_DEBUG_FLAG(C, x) \
-  static_assert(!C::x, "Unexpected enabled flag in release build");
-#else
-#define DECLARE_RUNTIME_DEBUG_FLAG(x) \
-  static bool x;
-#define DEFINE_RUNTIME_DEBUG_FLAG(C, x) \
-  bool C::x = RegisterRuntimeDebugFlag(&C::x);
-#endif  // NDEBUG
-
-bool RegisterRuntimeDebugFlag(bool* runtime_debug_flag);
-void SetRuntimeDebugFlagsEnabled(bool enabled);
-
-// 0 if not abort, non-zero if an abort is in progress. Used on fatal exit to prevents recursive
-// aborts. Global declaration allows us to disable some error checking to ensure fatal shutdown
-// makes forward progress.
-extern std::atomic<unsigned int> gAborting;
-
 // Configure logging based on ANDROID_LOG_TAGS environment variable.
 // We need to parse a string that looks like
 //
diff --git a/runtime/base/logging_test.cc b/runtime/base/logging_test.cc
index d380b9e..404e080 100644
--- a/runtime/base/logging_test.cc
+++ b/runtime/base/logging_test.cc
@@ -22,6 +22,7 @@
 #include "base/bit_utils.h"
 #include "base/macros.h"
 #include "common_runtime_test.h"
+#include "runtime_debug.h"
 
 namespace art {
 
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index 6cd7d60..512e5ce 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -59,6 +59,10 @@
 #define QUOTE(x) #x
 #define STRINGIFY(x) QUOTE(x)
 
+// Append tokens after evaluating.
+#define APPEND_TOKENS_AFTER_EVAL_2(a, b) a ## b
+#define APPEND_TOKENS_AFTER_EVAL(a, b) APPEND_TOKENS_AFTER_EVAL_2(a, b)
+
 #ifndef NDEBUG
 #define ALWAYS_INLINE
 #else
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index c0cf487..7077298 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -24,8 +24,10 @@
 #include <iosfwd>
 #include <string>
 
+#include <android-base/logging.h>
+
 #include "atomic.h"
-#include "base/logging.h"
+#include "base/aborting.h"
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/runtime/base/runtime_debug.cc b/runtime/base/runtime_debug.cc
new file mode 100644
index 0000000..4f8a8ec
--- /dev/null
+++ b/runtime/base/runtime_debug.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "runtime_debug.h"
+
+#include <vector>
+
+#include "globals.h"
+
+namespace art {
+
+// We test here that the runtime-debug-checks are actually a no-op constexpr false in release
+// builds, as we can't check that in gtests (which are always debug).
+
+#ifdef NDEBUG
+namespace {
+DECLARE_RUNTIME_DEBUG_FLAG(kTestForConstexpr);
+static_assert(!kTestForConstexpr, "Issue with DECLARE_RUNTIME_DEBUG_FLAG in NDEBUG.");
+}
+#endif
+
+// Implementation of runtime debug flags. This should be compile-time optimized away in release
+// builds.
+namespace {
+bool gSlowEnabled = false;  // Default for slow flags is "off."
+
+// Use a function with a static to ensure our vector storage doesn't have initialization order
+// issues.
+std::vector<bool*>& GetFlagPtrs() {
+  static std::vector<bool*> g_flag_ptrs;
+  return g_flag_ptrs;
+}
+
+bool RegisterRuntimeDebugFlagImpl(bool* flag_ptr) {
+  GetFlagPtrs().push_back(flag_ptr);
+  return gSlowEnabled;
+}
+
+void SetRuntimeDebugFlagsEnabledImpl(bool enabled) {
+  gSlowEnabled = enabled;
+  for (bool* flag_ptr : GetFlagPtrs()) {
+    *flag_ptr = enabled;
+  }
+}
+
+}  // namespace
+
+bool RegisterRuntimeDebugFlag(bool* flag_ptr) {
+  if (kIsDebugBuild) {
+    return RegisterRuntimeDebugFlagImpl(flag_ptr);
+  }
+  return false;
+}
+
+void SetRuntimeDebugFlagsEnabled(bool enabled) {
+  if (kIsDebugBuild) {
+    SetRuntimeDebugFlagsEnabledImpl(enabled);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/runtime_debug.h b/runtime/base/runtime_debug.h
new file mode 100644
index 0000000..89a0361
--- /dev/null
+++ b/runtime/base/runtime_debug.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_RUNTIME_DEBUG_H_
+#define ART_RUNTIME_BASE_RUNTIME_DEBUG_H_
+
+namespace art {
+
+// Runtime debug flags are flags that have a runtime component, that is, their value can be changed.
+// This is meant to implement fast vs slow debug builds, in that certain debug flags can be turned
+// on and off. To that effect, expose two macros to help implement and globally drive these flags:
+//
+// In the header, declare a (class) flag like this:
+//
+//   class C {
+//     DECLARE_RUNTIME_DEBUG_FLAG(kFlag);
+//   };
+//
+// This will declare a flag kFlag that is a constexpr false in release builds, and a static field
+// in debug builds. Usage is than uniform as C::kFlag.
+//
+// In the cc file, define the flag like this:
+//
+//   DEFINE_RUNTIME_DEBUG_FLAG(C, kFlag);
+//
+// This will define the static storage, as necessary, and register the flag with the runtime
+// infrastructure to toggle the value.
+
+#ifdef NDEBUG
+#define DECLARE_RUNTIME_DEBUG_FLAG(x) \
+  static constexpr bool x = false;
+// Note: the static_assert in the following only works for public flags. Fix this when we cross
+//       the line at some point.
+#define DEFINE_RUNTIME_DEBUG_FLAG(C, x) \
+  static_assert(!C::x, "Unexpected enabled flag in release build");
+#else
+#define DECLARE_RUNTIME_DEBUG_FLAG(x) \
+  static bool x;
+#define DEFINE_RUNTIME_DEBUG_FLAG(C, x) \
+  bool C::x = RegisterRuntimeDebugFlag(&C::x);
+#endif  // NDEBUG
+
+bool RegisterRuntimeDebugFlag(bool* runtime_debug_flag);
+void SetRuntimeDebugFlagsEnabled(bool enabled);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_RUNTIME_DEBUG_H_
diff --git a/runtime/base/scoped_arena_allocator.h b/runtime/base/scoped_arena_allocator.h
index 8f50fd4..35e337f 100644
--- a/runtime/base/scoped_arena_allocator.h
+++ b/runtime/base/scoped_arena_allocator.h
@@ -17,10 +17,11 @@
 #ifndef ART_RUNTIME_BASE_SCOPED_ARENA_ALLOCATOR_H_
 #define ART_RUNTIME_BASE_SCOPED_ARENA_ALLOCATOR_H_
 
+#include <android-base/logging.h>
+
 #include "arena_allocator.h"
 #include "debug_stack.h"
 #include "globals.h"
-#include "logging.h"
 #include "macros.h"
 
 namespace art {
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index b8df689..514b97b 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -19,9 +19,9 @@
 #include <sys/file.h>
 #include <sys/stat.h>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
-#include "base/logging.h"
 #include "base/unix_file/fd_file.h"
 
 namespace art {
diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h
index 1b933c0..db6c819 100644
--- a/runtime/base/scoped_flock.h
+++ b/runtime/base/scoped_flock.h
@@ -20,9 +20,8 @@
 #include <memory>
 #include <string>
 
-#include "android-base/unique_fd.h"
+#include <android-base/unique_fd.h>
 
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/unix_file/fd_file.h"
 #include "os.h"
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index b272972..02f3765 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -21,7 +21,7 @@
 #include <set>
 #include <sstream>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/base/stringpiece.cc b/runtime/base/stringpiece.cc
index 2570bad..672431c 100644
--- a/runtime/base/stringpiece.cc
+++ b/runtime/base/stringpiece.cc
@@ -19,7 +19,7 @@
 #include <ostream>
 #include <utility>
 
-#include "logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/base/systrace.h b/runtime/base/systrace.h
index 06db48a..dc2206e 100644
--- a/runtime/base/systrace.h
+++ b/runtime/base/systrace.h
@@ -19,10 +19,12 @@
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <cutils/trace.h>
-#include <utils/Trace.h>
 
+#include <sstream>
 #include <string>
 
+#include "android-base/stringprintf.h"
+
 namespace art {
 
 class ScopedTrace {
@@ -30,6 +32,12 @@
   explicit ScopedTrace(const char* name) {
     ATRACE_BEGIN(name);
   }
+  template <typename Fn>
+  explicit ScopedTrace(Fn fn) {
+    if (ATRACE_ENABLED()) {
+      ATRACE_BEGIN(fn().c_str());
+    }
+  }
 
   explicit ScopedTrace(const std::string& name) : ScopedTrace(name.c_str()) {}
 
@@ -38,6 +46,38 @@
   }
 };
 
+// Helper for the SCOPED_TRACE macro. Do not use directly.
+class ScopedTraceNoStart {
+ public:
+  ScopedTraceNoStart() {
+  }
+
+  ~ScopedTraceNoStart() {
+    ATRACE_END();
+  }
+
+  // Message helper for the macro. Do not use directly.
+  class ScopedTraceMessageHelper {
+   public:
+    ScopedTraceMessageHelper() {
+    }
+    ~ScopedTraceMessageHelper() {
+      ATRACE_BEGIN(buffer_.str().c_str());
+    }
+
+    std::ostream& stream() {
+      return buffer_;
+    }
+
+   private:
+    std::ostringstream buffer_;
+  };
+};
+
+#define SCOPED_TRACE \
+  ::art::ScopedTraceNoStart trace ## __LINE__; \
+  (ATRACE_ENABLED()) && ::art::ScopedTraceNoStart::ScopedTraceMessageHelper().stream()
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_SYSTRACE_H_
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index b8d6931..23ec3e1 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -18,8 +18,9 @@
 
 #include "timing_logger.h"
 
+#include <android-base/logging.h>
+
 #include "base/histogram-inl.h"
-#include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 792c581..37f239d 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -23,7 +23,7 @@
 
 #include <limits>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 // Includes needed for FdFile::Copy().
 #ifdef __linux__
diff --git a/runtime/cdex/compact_dex_level.h b/runtime/cdex/compact_dex_level.h
index b824462..5aec001 100644
--- a/runtime/cdex/compact_dex_level.h
+++ b/runtime/cdex/compact_dex_level.h
@@ -17,6 +17,9 @@
 #ifndef ART_RUNTIME_CDEX_COMPACT_DEX_LEVEL_H_
 #define ART_RUNTIME_CDEX_COMPACT_DEX_LEVEL_H_
 
+#include <string>
+
+#include "base/macros.h"
 #include "dex_file.h"
 
 namespace art {
@@ -29,6 +32,19 @@
   kCompactDexLevelFast,
 };
 
+#ifndef ART_DEFAULT_COMPACT_DEX_LEVEL
+#error ART_DEFAULT_COMPACT_DEX_LEVEL not specified.
+#else
+#define ART_DEFAULT_COMPACT_DEX_LEVEL_VALUE_fast CompactDexLevel::kCompactDexLevelFast
+#define ART_DEFAULT_COMPACT_DEX_LEVEL_VALUE_none CompactDexLevel::kCompactDexLevelNone
+
+#define ART_DEFAULT_COMPACT_DEX_LEVEL_DEFAULT APPEND_TOKENS_AFTER_EVAL( \
+    ART_DEFAULT_COMPACT_DEX_LEVEL_VALUE_, \
+    ART_DEFAULT_COMPACT_DEX_LEVEL)
+
+static constexpr CompactDexLevel kDefaultCompactDexLevel = ART_DEFAULT_COMPACT_DEX_LEVEL_DEFAULT;
+#endif
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CDEX_COMPACT_DEX_LEVEL_H_
diff --git a/runtime/cha.cc b/runtime/cha.cc
index 6c011e8..a53d7e5 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -17,6 +17,7 @@
 #include "cha.h"
 
 #include "art_method-inl.h"
+#include "base/logging.h"  // For VLOG
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "linear_alloc.h"
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index c3dd702..90f478f 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -21,11 +21,12 @@
 
 #include <iomanip>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "base/to_str.h"
 #include "class_linker-inl.h"
 #include "class_linker.h"
@@ -372,7 +373,7 @@
     if (f == nullptr) {
       return false;
     }
-    if (c != f->GetDeclaringClass()) {
+    if (!f->GetDeclaringClass()->IsAssignableFrom(c)) {
       AbortF("static jfieldID %p not valid for class %s", fid,
              mirror::Class::PrettyClass(c).c_str());
       return false;
@@ -709,7 +710,7 @@
         return false;
       }
       ObjPtr<mirror::Class> c = o->AsClass();
-      if (c != field->GetDeclaringClass()) {
+      if (!field->GetDeclaringClass()->IsAssignableFrom(c)) {
         AbortF("attempt to access static field %s with an incompatible class argument of %s: %p",
                field->PrettyField().c_str(), mirror::Class::PrettyDescriptor(c).c_str(), fid);
         return false;
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index d6f0030..4b317f8 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -61,35 +61,96 @@
   return array_class.Ptr();
 }
 
+inline ObjPtr<mirror::Class> ClassLinker::ResolveType(dex::TypeIndex type_idx,
+                                                      ObjPtr<mirror::Class> referrer) {
+  if (kObjPtrPoisoning) {
+    StackHandleScope<1> hs(Thread::Current());
+    HandleWrapperObjPtr<mirror::Class> referrer_wrapper = hs.NewHandleWrapper(&referrer);
+    Thread::Current()->PoisonObjectPointers();
+  }
+  if (kIsDebugBuild) {
+    Thread::Current()->AssertNoPendingException();
+  }
+  // We do not need the read barrier for getting the DexCache for the initial resolved type
+  // lookup as both from-space and to-space copies point to the same native resolved types array.
+  ObjPtr<mirror::Class> resolved_type =
+      referrer->GetDexCache<kDefaultVerifyFlags, kWithoutReadBarrier>()->GetResolvedType(type_idx);
+  if (resolved_type == nullptr) {
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(referrer->GetDexCache()));
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
+    resolved_type = DoResolveType(type_idx, h_dex_cache, class_loader);
+  }
+  return resolved_type;
+}
+
+inline ObjPtr<mirror::Class> ClassLinker::ResolveType(dex::TypeIndex type_idx,
+                                                      ArtMethod* referrer) {
+  Thread::PoisonObjectPointersIfDebug();
+  if (kIsDebugBuild) {
+    Thread::Current()->AssertNoPendingException();
+  }
+  // We do not need the read barrier for getting the DexCache for the initial resolved type
+  // lookup as both from-space and to-space copies point to the same native resolved types array.
+  ObjPtr<mirror::Class> resolved_type =
+      referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedType(type_idx);
+  if (UNLIKELY(resolved_type == nullptr)) {
+    StackHandleScope<2> hs(Thread::Current());
+    ObjPtr<mirror::Class> referring_class = referrer->GetDeclaringClass();
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referring_class->GetClassLoader()));
+    resolved_type = DoResolveType(type_idx, dex_cache, class_loader);
+  }
+  return resolved_type;
+}
+
+inline ObjPtr<mirror::Class> ClassLinker::ResolveType(dex::TypeIndex type_idx,
+                                                      Handle<mirror::DexCache> dex_cache,
+                                                      Handle<mirror::ClassLoader> class_loader) {
+  DCHECK(dex_cache != nullptr);
+  Thread::PoisonObjectPointersIfDebug();
+  ObjPtr<mirror::Class> resolved = dex_cache->GetResolvedType(type_idx);
+  if (resolved == nullptr) {
+    resolved = DoResolveType(type_idx, dex_cache, class_loader);
+  }
+  return resolved;
+}
+
+inline ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(dex::TypeIndex type_idx,
+                                                             ObjPtr<mirror::Class> referrer) {
+  // We do not need the read barrier for getting the DexCache for the initial resolved type
+  // lookup as both from-space and to-space copies point to the same native resolved types array.
+  ObjPtr<mirror::Class> type =
+      referrer->GetDexCache<kDefaultVerifyFlags, kWithoutReadBarrier>()->GetResolvedType(type_idx);
+  if (type == nullptr) {
+    type = DoLookupResolvedType(type_idx, referrer->GetDexCache(), referrer->GetClassLoader());
+  }
+  return type;
+}
+
+inline ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(dex::TypeIndex type_idx,
+                                                             ArtMethod* referrer) {
+  // We do not need the read barrier for getting the DexCache for the initial resolved type
+  // lookup as both from-space and to-space copies point to the same native resolved types array.
+  ObjPtr<mirror::Class> type =
+      referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedType(type_idx);
+  if (type == nullptr) {
+    type = DoLookupResolvedType(type_idx, referrer->GetDexCache(), referrer->GetClassLoader());
+  }
+  return type;
+}
+
 inline ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(
     dex::TypeIndex type_idx,
     ObjPtr<mirror::DexCache> dex_cache,
     ObjPtr<mirror::ClassLoader> class_loader) {
   ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
   if (type == nullptr) {
-    type = Runtime::Current()->GetClassLinker()->LookupResolvedType(
-        *dex_cache->GetDexFile(), type_idx, dex_cache, class_loader);
+    type = DoLookupResolvedType(type_idx, dex_cache, class_loader);
   }
   return type;
 }
 
-inline mirror::Class* ClassLinker::ResolveType(dex::TypeIndex type_idx, ArtMethod* referrer) {
-  Thread::PoisonObjectPointersIfDebug();
-  if (kIsDebugBuild) {
-    Thread::Current()->AssertNoPendingException();
-  }
-  ObjPtr<mirror::Class> resolved_type = referrer->GetDexCache()->GetResolvedType(type_idx);
-  if (UNLIKELY(resolved_type == nullptr)) {
-    StackHandleScope<2> hs(Thread::Current());
-    ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
-    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
-  }
-  return resolved_type.Ptr();
-}
-
 template <bool kThrowOnError, typename ClassGetter>
 inline bool ClassLinker::CheckInvokeClassMismatch(ObjPtr<mirror::DexCache> dex_cache,
                                                   InvokeType type,
@@ -147,10 +208,9 @@
       dex_cache,
       type,
       [this, dex_cache, method_idx, class_loader]() REQUIRES_SHARED(Locks::mutator_lock_) {
-        const DexFile& dex_file = *dex_cache->GetDexFile();
-        const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
+        const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(method_idx);
         ObjPtr<mirror::Class> klass =
-            LookupResolvedType(dex_file, method_id.class_idx_, dex_cache, class_loader);
+            LookupResolvedType(method_id.class_idx_, dex_cache, class_loader);
         DCHECK(klass != nullptr);
         return klass;
       });
@@ -186,6 +246,8 @@
   // lookup in the context of the original method from where it steals the code.
   // However, we delay the GetInterfaceMethodIfProxy() until needed.
   DCHECK(!referrer->IsProxyMethod() || referrer->IsConstructor());
+  // We do not need the read barrier for getting the DexCache for the initial resolved method
+  // lookup as both from-space and to-space copies point to the same native resolved methods array.
   ArtMethod* resolved_method = referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedMethod(
       method_idx, image_pointer_size_);
   if (resolved_method == nullptr) {
@@ -227,6 +289,8 @@
   // However, we delay the GetInterfaceMethodIfProxy() until needed.
   DCHECK(!referrer->IsProxyMethod() || referrer->IsConstructor());
   Thread::PoisonObjectPointersIfDebug();
+  // We do not need the read barrier for getting the DexCache for the initial resolved method
+  // lookup as both from-space and to-space copies point to the same native resolved methods array.
   ArtMethod* resolved_method = referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedMethod(
       method_idx, image_pointer_size_);
   DCHECK(resolved_method == nullptr || !resolved_method->IsRuntimeMethod());
@@ -236,9 +300,7 @@
     StackHandleScope<2> hs(self);
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(referrer->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    const DexFile* dex_file = h_dex_cache->GetDexFile();
-    resolved_method = ResolveMethod<kResolveMode>(*dex_file,
-                                                  method_idx,
+    resolved_method = ResolveMethod<kResolveMode>(method_idx,
                                                   h_dex_cache,
                                                   h_class_loader,
                                                   referrer,
@@ -279,10 +341,13 @@
 inline ArtField* ClassLinker::LookupResolvedField(uint32_t field_idx,
                                                   ArtMethod* referrer,
                                                   bool is_static) {
-  ObjPtr<mirror::DexCache> dex_cache = referrer->GetDexCache();
-  ArtField* field = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
+  // We do not need the read barrier for getting the DexCache for the initial resolved field
+  // lookup as both from-space and to-space copies point to the same native resolved fields array.
+  ArtField* field = referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedField(
+      field_idx, image_pointer_size_);
   if (field == nullptr) {
-    field = LookupResolvedField(field_idx, dex_cache, referrer->GetClassLoader(), is_static);
+    ObjPtr<mirror::ClassLoader> class_loader = referrer->GetDeclaringClass()->GetClassLoader();
+    field = LookupResolvedField(field_idx, referrer->GetDexCache(), class_loader, is_static);
   }
   return field;
 }
@@ -291,15 +356,16 @@
                                            ArtMethod* referrer,
                                            bool is_static) {
   Thread::PoisonObjectPointersIfDebug();
-  ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
-  ArtField* resolved_field =
-      referrer->GetDexCache()->GetResolvedField(field_idx, image_pointer_size_);
+  // We do not need the read barrier for getting the DexCache for the initial resolved field
+  // lookup as both from-space and to-space copies point to the same native resolved fields array.
+  ArtField* resolved_field = referrer->GetDexCache<kWithoutReadBarrier>()->GetResolvedField(
+      field_idx, image_pointer_size_);
   if (UNLIKELY(resolved_field == nullptr)) {
     StackHandleScope<2> hs(Thread::Current());
+    ObjPtr<mirror::Class> referring_class = referrer->GetDeclaringClass();
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
-    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    resolved_field = ResolveField(dex_file, field_idx, dex_cache, class_loader, is_static);
+    Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referring_class->GetClassLoader()));
+    resolved_field = ResolveField(field_idx, dex_cache, class_loader, is_static);
     // Note: We cannot check here to see whether we added the field to the cache. The type
     //       might be an erroneous class, which results in it being hidden from us.
   }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 38dd761..55fa632 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4515,7 +4515,7 @@
 
 void ClassLinker::CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out) {
   // Create constructor for Proxy that must initialize the method.
-  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 23u);
+  CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 21u);
 
   // Find the <init>(InvocationHandler)V method. The exact method offset varies depending
   // on which front-end compiler was used to build the libcore DEX files.
@@ -4818,7 +4818,6 @@
   if (num_static_fields > 0) {
     const DexFile::ClassDef* dex_class_def = klass->GetClassDef();
     CHECK(dex_class_def != nullptr);
-    const DexFile& dex_file = klass->GetDexFile();
     StackHandleScope<3> hs(self);
     Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
@@ -4836,11 +4835,11 @@
       }
     }
 
-    annotations::RuntimeEncodedStaticFieldValueIterator value_it(dex_file,
-                                                                 &dex_cache,
-                                                                 &class_loader,
+    annotations::RuntimeEncodedStaticFieldValueIterator value_it(dex_cache,
+                                                                 class_loader,
                                                                  this,
                                                                  *dex_class_def);
+    const DexFile& dex_file = *dex_cache->GetDexFile();
     const uint8_t* class_data = dex_file.GetClassData(*dex_class_def);
     ClassDataItemIterator field_it(dex_file, class_data);
     if (value_it.HasNext()) {
@@ -4848,7 +4847,7 @@
       CHECK(can_init_statics);
       for ( ; value_it.HasNext(); value_it.Next(), field_it.Next()) {
         ArtField* field = ResolveField(
-            dex_file, field_it.GetMemberIndex(), dex_cache, class_loader, true);
+            field_it.GetMemberIndex(), dex_cache, class_loader, /* is_static */ true);
         if (Runtime::Current()->IsActiveTransaction()) {
           value_it.ReadValueToField<true>(field);
         } else {
@@ -5467,7 +5466,7 @@
       return false;
     }
 
-    ObjPtr<mirror::Class> super_class = ResolveType(dex_file, super_class_idx, klass.Get());
+    ObjPtr<mirror::Class> super_class = ResolveType(super_class_idx, klass.Get());
     if (super_class == nullptr) {
       DCHECK(Thread::Current()->IsExceptionPending());
       return false;
@@ -5486,7 +5485,7 @@
   if (interfaces != nullptr) {
     for (size_t i = 0; i < interfaces->Size(); i++) {
       dex::TypeIndex idx = interfaces->GetTypeItem(i).type_idx_;
-      ObjPtr<mirror::Class> interface = ResolveType(dex_file, idx, klass.Get());
+      ObjPtr<mirror::Class> interface = ResolveType(idx, klass.Get());
       if (interface == nullptr) {
         DCHECK(Thread::Current()->IsExceptionPending());
         return false;
@@ -5523,6 +5522,10 @@
     return false;
   }
   // Verify
+  if (klass->IsInterface() && super != GetClassRoot(kJavaLangObject)) {
+    ThrowClassFormatError(klass.Get(), "Interfaces must have java.lang.Object as superclass");
+    return false;
+  }
   if (super->IsFinal()) {
     ThrowVerifyError(klass.Get(),
                      "Superclass %s of %s is declared final",
@@ -7723,32 +7726,32 @@
   klass->SetReferenceInstanceOffsets(reference_offsets);
 }
 
-mirror::String* ClassLinker::ResolveString(const DexFile& dex_file,
-                                           dex::StringIndex string_idx,
-                                           Handle<mirror::DexCache> dex_cache) {
+ObjPtr<mirror::String> ClassLinker::ResolveString(dex::StringIndex string_idx,
+                                                  Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache != nullptr);
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != nullptr) {
-    return resolved.Ptr();
+    return resolved;
   }
+  const DexFile& dex_file = *dex_cache->GetDexFile();
   uint32_t utf16_length;
   const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
   ObjPtr<mirror::String> string = intern_table_->InternStrong(utf16_length, utf8_data);
   if (string != nullptr) {
     dex_cache->SetResolvedString(string_idx, string);
   }
-  return string.Ptr();
+  return string;
 }
 
-mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
-                                          dex::StringIndex string_idx,
-                                          ObjPtr<mirror::DexCache> dex_cache) {
+ObjPtr<mirror::String> ClassLinker::LookupString(dex::StringIndex string_idx,
+                                                 ObjPtr<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache != nullptr);
   ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != nullptr) {
-    return resolved.Ptr();
+    return resolved;
   }
+  const DexFile& dex_file = *dex_cache->GetDexFile();
   uint32_t utf16_length;
   const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
   ObjPtr<mirror::String> string =
@@ -7756,87 +7759,168 @@
   if (string != nullptr) {
     dex_cache->SetResolvedString(string_idx, string);
   }
-  return string.Ptr();
+  return string;
 }
 
-ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(const DexFile& dex_file,
-                                                      dex::TypeIndex type_idx,
-                                                      ObjPtr<mirror::DexCache> dex_cache,
-                                                      ObjPtr<mirror::ClassLoader> class_loader) {
-  ObjPtr<mirror::Class> type = dex_cache->GetResolvedType(type_idx);
-  if (type == nullptr) {
-    const char* descriptor = dex_file.StringByTypeIdx(type_idx);
-    DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
-    if (descriptor[1] == '\0') {
-      // only the descriptors of primitive types should be 1 character long, also avoid class lookup
-      // for primitive classes that aren't backed by dex files.
-      type = FindPrimitiveClass(descriptor[0]);
+ObjPtr<mirror::Class> ClassLinker::DoLookupResolvedType(dex::TypeIndex type_idx,
+                                                        ObjPtr<mirror::DexCache> dex_cache,
+                                                        ObjPtr<mirror::ClassLoader> class_loader) {
+  const DexFile& dex_file = *dex_cache->GetDexFile();
+  const char* descriptor = dex_file.StringByTypeIdx(type_idx);
+  DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
+  ObjPtr<mirror::Class> type = nullptr;
+  if (descriptor[1] == '\0') {
+    // only the descriptors of primitive types should be 1 character long, also avoid class lookup
+    // for primitive classes that aren't backed by dex files.
+    type = FindPrimitiveClass(descriptor[0]);
+  } else {
+    Thread* const self = Thread::Current();
+    DCHECK(self != nullptr);
+    const size_t hash = ComputeModifiedUtf8Hash(descriptor);
+    // Find the class in the loaded classes table.
+    type = LookupClass(self, descriptor, hash, class_loader.Ptr());
+  }
+  if (type != nullptr) {
+    if (type->IsResolved()) {
+      dex_cache->SetResolvedType(type_idx, type);
     } else {
-      Thread* const self = Thread::Current();
-      DCHECK(self != nullptr);
-      const size_t hash = ComputeModifiedUtf8Hash(descriptor);
-      // Find the class in the loaded classes table.
-      type = LookupClass(self, descriptor, hash, class_loader.Ptr());
-    }
-    if (type != nullptr) {
-      if (type->IsResolved()) {
-        dex_cache->SetResolvedType(type_idx, type);
-      } else {
-        type = nullptr;
-      }
+      type = nullptr;
     }
   }
-  DCHECK(type == nullptr || type->IsResolved());
   return type;
 }
 
-mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
-                                        dex::TypeIndex type_idx,
-                                        ObjPtr<mirror::Class> referrer) {
-  StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
-  return ResolveType(dex_file, type_idx, dex_cache, class_loader);
-}
-
-mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
-                                        dex::TypeIndex type_idx,
-                                        Handle<mirror::DexCache> dex_cache,
-                                        Handle<mirror::ClassLoader> class_loader) {
-  DCHECK(dex_cache != nullptr);
-  Thread::PoisonObjectPointersIfDebug();
-  ObjPtr<mirror::Class> resolved = dex_cache->GetResolvedType(type_idx);
-  if (resolved == nullptr) {
-    Thread* self = Thread::Current();
-    const char* descriptor = dex_file.StringByTypeIdx(type_idx);
-    resolved = FindClass(self, descriptor, class_loader);
-    if (resolved != nullptr) {
-      // TODO: we used to throw here if resolved's class loader was not the
-      //       boot class loader. This was to permit different classes with the
-      //       same name to be loaded simultaneously by different loaders
-      dex_cache->SetResolvedType(type_idx, resolved);
-    } else {
-      CHECK(self->IsExceptionPending())
-          << "Expected pending exception for failed resolution of: " << descriptor;
-      // Convert a ClassNotFoundException to a NoClassDefFoundError.
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
-      if (cause->InstanceOf(GetClassRoot(kJavaLangClassNotFoundException))) {
-        DCHECK(resolved == nullptr);  // No Handle needed to preserve resolved.
-        self->ClearException();
-        ThrowNoClassDefFoundError("Failed resolution of: %s", descriptor);
-        self->GetException()->SetCause(cause.Get());
-      }
+ObjPtr<mirror::Class> ClassLinker::DoResolveType(dex::TypeIndex type_idx,
+                                                 Handle<mirror::DexCache> dex_cache,
+                                                 Handle<mirror::ClassLoader> class_loader) {
+  Thread* self = Thread::Current();
+  const char* descriptor = dex_cache->GetDexFile()->StringByTypeIdx(type_idx);
+  ObjPtr<mirror::Class> resolved = FindClass(self, descriptor, class_loader);
+  if (resolved != nullptr) {
+    // TODO: we used to throw here if resolved's class loader was not the
+    //       boot class loader. This was to permit different classes with the
+    //       same name to be loaded simultaneously by different loaders
+    dex_cache->SetResolvedType(type_idx, resolved);
+  } else {
+    CHECK(self->IsExceptionPending())
+        << "Expected pending exception for failed resolution of: " << descriptor;
+    // Convert a ClassNotFoundException to a NoClassDefFoundError.
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
+    if (cause->InstanceOf(GetClassRoot(kJavaLangClassNotFoundException))) {
+      DCHECK(resolved == nullptr);  // No Handle needed to preserve resolved.
+      self->ClearException();
+      ThrowNoClassDefFoundError("Failed resolution of: %s", descriptor);
+      self->GetException()->SetCause(cause.Get());
     }
   }
   DCHECK((resolved == nullptr) || resolved->IsResolved())
       << resolved->PrettyDescriptor() << " " << resolved->GetStatus();
-  return resolved.Ptr();
+  return resolved;
+}
+
+std::string DescribeSpace(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+  std::ostringstream oss;
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  gc::space::ContinuousSpace* cs = heap->FindContinuousSpaceFromAddress(klass.Ptr());
+  if (cs != nullptr) {
+    if (cs->IsImageSpace()) {
+      oss << "image;" << cs->GetName() << ";" << cs->AsImageSpace()->GetImageFilename();
+    } else {
+      oss << "continuous;" << cs->GetName();
+    }
+  } else {
+    gc::space::DiscontinuousSpace* ds =
+        heap->FindDiscontinuousSpaceFromObject(klass, /* fail_ok */ true);
+    if (ds != nullptr) {
+      oss << "discontinuous;" << ds->GetName();
+    } else {
+      oss << "invalid";
+    }
+  }
+  return oss.str();
+}
+
+std::string DescribeLoaders(ObjPtr<mirror::ClassLoader> loader, const char* class_descriptor)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  std::ostringstream oss;
+  uint32_t hash = ComputeModifiedUtf8Hash(class_descriptor);
+  ObjPtr<mirror::Class> path_class_loader =
+      WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_PathClassLoader);
+  ObjPtr<mirror::Class> dex_class_loader =
+      WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_DexClassLoader);
+  ObjPtr<mirror::Class> delegate_last_class_loader =
+      WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_DelegateLastClassLoader);
+
+  // Print the class loader chain.
+  bool found_class = false;
+  const char* loader_separator = "";
+  if (loader == nullptr) {
+    oss << "BootClassLoader";  // This would be unexpected.
+  }
+  for (; loader != nullptr; loader = loader->GetParent()) {
+    oss << loader_separator << loader->GetClass()->PrettyDescriptor();
+    loader_separator = ";";
+    // If we didn't find the interface yet, try to find it in the current class loader.
+    if (!found_class) {
+      ClassTable* table = Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(loader);
+      ObjPtr<mirror::Class> klass =
+          (table != nullptr) ? table->Lookup(class_descriptor, hash) : nullptr;
+      if (klass != nullptr) {
+        found_class = true;
+        oss << "[hit:" << DescribeSpace(klass) << "]";
+      }
+    }
+
+    // For PathClassLoader, DexClassLoader or DelegateLastClassLoader
+    // also dump the dex file locations.
+    if (loader->GetClass() == path_class_loader ||
+        loader->GetClass() == dex_class_loader ||
+        loader->GetClass() == delegate_last_class_loader) {
+      ArtField* const cookie_field =
+          jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
+      ArtField* const dex_file_field =
+          jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      ObjPtr<mirror::Object> dex_path_list =
+          jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+              GetObject(loader);
+      if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
+        ObjPtr<mirror::Object> dex_elements_obj =
+            jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
+        if (dex_elements_obj != nullptr) {
+          ObjPtr<mirror::ObjectArray<mirror::Object>> dex_elements =
+              dex_elements_obj->AsObjectArray<mirror::Object>();
+          oss << "(";
+          const char* path_separator = "";
+          for (int32_t i = 0; i != dex_elements->GetLength(); ++i) {
+            ObjPtr<mirror::Object> element = dex_elements->GetWithoutChecks(i);
+            ObjPtr<mirror::Object> dex_file =
+                (element != nullptr) ? dex_file_field->GetObject(element) : nullptr;
+            ObjPtr<mirror::LongArray> long_array =
+                (dex_file != nullptr) ? cookie_field->GetObject(dex_file)->AsLongArray() : nullptr;
+            if (long_array != nullptr) {
+              int32_t long_array_size = long_array->GetLength();
+              // First element is the oat file.
+              for (int32_t j = kDexFileIndexStart; j < long_array_size; ++j) {
+                const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(
+                    static_cast<uintptr_t>(long_array->GetWithoutChecks(j)));
+                oss << path_separator << cp_dex_file->GetLocation();
+                path_separator = ":";
+              }
+            }
+          }
+          oss << ")";
+        }
+      }
+    }
+  }
+
+  return oss.str();
 }
 
 template <ClassLinker::ResolveMode kResolveMode>
-ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
-                                      uint32_t method_idx,
+ArtMethod* ClassLinker::ResolveMethod(uint32_t method_idx,
                                       Handle<mirror::DexCache> dex_cache,
                                       Handle<mirror::ClassLoader> class_loader,
                                       ArtMethod* referrer,
@@ -7854,17 +7938,24 @@
     DCHECK(resolved->GetDeclaringClassUnchecked() != nullptr) << resolved->GetDexMethodIndex();
     return resolved;
   }
+  const DexFile& dex_file = *dex_cache->GetDexFile();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
   ObjPtr<mirror::Class> klass = nullptr;
   if (valid_dex_cache_method) {
     // We have a valid method from the DexCache but we need to perform ICCE and IAE checks.
     DCHECK(resolved->GetDeclaringClassUnchecked() != nullptr) << resolved->GetDexMethodIndex();
-    klass = LookupResolvedType(dex_file, method_id.class_idx_, dex_cache.Get(), class_loader.Get());
-    CHECK(klass != nullptr) << resolved->PrettyMethod() << " " << resolved << " "
-                            << resolved->GetAccessFlags();
+    klass = LookupResolvedType(method_id.class_idx_, dex_cache.Get(), class_loader.Get());
+    if (UNLIKELY(klass == nullptr)) {
+      const char* descriptor = dex_file.StringByTypeIdx(method_id.class_idx_);
+      LOG(FATAL) << "Check failed: klass != nullptr Bug: 64759619 Method: "
+          << resolved->PrettyMethod() << ";" << resolved
+          << "/0x" << std::hex << resolved->GetAccessFlags()
+          << " ReferencedClass: " << descriptor
+          << " ClassLoader: " << DescribeLoaders(class_loader.Get(), descriptor);
+    }
   } else {
     // The method was not in the DexCache, resolve the declaring class.
-    klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
+    klass = ResolveType(method_id.class_idx_, dex_cache, class_loader);
     if (klass == nullptr) {
       DCHECK(Thread::Current()->IsExceptionPending());
       return nullptr;
@@ -7939,8 +8030,7 @@
   }
 }
 
-ArtMethod* ClassLinker::ResolveMethodWithoutInvokeType(const DexFile& dex_file,
-                                                       uint32_t method_idx,
+ArtMethod* ClassLinker::ResolveMethodWithoutInvokeType(uint32_t method_idx,
                                                        Handle<mirror::DexCache> dex_cache,
                                                        Handle<mirror::ClassLoader> class_loader) {
   ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx, image_pointer_size_);
@@ -7951,9 +8041,8 @@
     return resolved;
   }
   // Fail, get the declaring class.
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
-  ObjPtr<mirror::Class> klass =
-      ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(method_idx);
+  ObjPtr<mirror::Class> klass = ResolveType(method_id.class_idx_, dex_cache, class_loader);
   if (klass == nullptr) {
     Thread::Current()->AssertPendingException();
     return nullptr;
@@ -7975,7 +8064,7 @@
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
   ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(field_id.class_idx_);
   if (klass == nullptr) {
-    klass = LookupResolvedType(dex_file, field_id.class_idx_, dex_cache, class_loader);
+    klass = LookupResolvedType(field_id.class_idx_, dex_cache, class_loader);
   }
   if (klass == nullptr) {
     // The class has not been resolved yet, so the field is also unresolved.
@@ -8004,8 +8093,7 @@
   return resolved_field;
 }
 
-ArtField* ClassLinker::ResolveField(const DexFile& dex_file,
-                                    uint32_t field_idx,
+ArtField* ClassLinker::ResolveField(uint32_t field_idx,
                                     Handle<mirror::DexCache> dex_cache,
                                     Handle<mirror::ClassLoader> class_loader,
                                     bool is_static) {
@@ -8015,9 +8103,10 @@
   if (resolved != nullptr) {
     return resolved;
   }
+  const DexFile& dex_file = *dex_cache->GetDexFile();
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
   Thread* const self = Thread::Current();
-  ObjPtr<mirror::Class> klass = ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader);
+  ObjPtr<mirror::Class> klass = ResolveType(field_id.class_idx_, dex_cache, class_loader);
   if (klass == nullptr) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return nullptr;
@@ -8046,8 +8135,7 @@
   return resolved;
 }
 
-ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
-                                       uint32_t field_idx,
+ArtField* ClassLinker::ResolveFieldJLS(uint32_t field_idx,
                                        Handle<mirror::DexCache> dex_cache,
                                        Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache != nullptr);
@@ -8056,9 +8144,10 @@
   if (resolved != nullptr) {
     return resolved;
   }
+  const DexFile& dex_file = *dex_cache->GetDexFile();
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
   Thread* self = Thread::Current();
-  ObjPtr<mirror::Class> klass(ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader));
+  ObjPtr<mirror::Class> klass = ResolveType(field_id.class_idx_, dex_cache, class_loader);
   if (klass == nullptr) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return nullptr;
@@ -8075,11 +8164,11 @@
   return resolved;
 }
 
-mirror::MethodType* ClassLinker::ResolveMethodType(Thread* self,
-                                                   const DexFile& dex_file,
-                                                   uint32_t proto_idx,
-                                                   Handle<mirror::DexCache> dex_cache,
-                                                   Handle<mirror::ClassLoader> class_loader) {
+ObjPtr<mirror::MethodType> ClassLinker::ResolveMethodType(
+    Thread* self,
+    uint32_t proto_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader) {
   DCHECK(Runtime::Current()->IsMethodHandlesEnabled());
   DCHECK(dex_cache != nullptr);
 
@@ -8091,9 +8180,10 @@
   StackHandleScope<4> hs(self);
 
   // First resolve the return type.
+  const DexFile& dex_file = *dex_cache->GetDexFile();
   const DexFile::ProtoId& proto_id = dex_file.GetProtoId(proto_idx);
   Handle<mirror::Class> return_type(hs.NewHandle(
-      ResolveType(dex_file, proto_id.return_type_idx_, dex_cache, class_loader)));
+      ResolveType(proto_id.return_type_idx_, dex_cache, class_loader)));
   if (return_type == nullptr) {
     DCHECK(self->IsExceptionPending());
     return nullptr;
@@ -8119,7 +8209,7 @@
   MutableHandle<mirror::Class> param_class = hs.NewHandle<mirror::Class>(nullptr);
   for (; it.HasNext(); it.Next()) {
     const dex::TypeIndex type_idx = it.GetTypeIdx();
-    param_class.Assign(ResolveType(dex_file, type_idx, dex_cache, class_loader));
+    param_class.Assign(ResolveType(type_idx, dex_cache, class_loader));
     if (param_class == nullptr) {
       DCHECK(self->IsExceptionPending());
       return nullptr;
@@ -8137,14 +8227,13 @@
   return type.Get();
 }
 
-mirror::MethodType* ClassLinker::ResolveMethodType(Thread* self,
-                                                   uint32_t proto_idx,
-                                                   ArtMethod* referrer) {
+ObjPtr<mirror::MethodType> ClassLinker::ResolveMethodType(Thread* self,
+                                                          uint32_t proto_idx,
+                                                          ArtMethod* referrer) {
   StackHandleScope<2> hs(self);
-  const DexFile* dex_file = referrer->GetDexFile();
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
-  return ResolveMethodType(self, *dex_file, proto_idx, dex_cache, class_loader);
+  return ResolveMethodType(self, proto_idx, dex_cache, class_loader);
 }
 
 mirror::MethodHandle* ClassLinker::ResolveMethodHandleForField(
@@ -8322,8 +8411,7 @@
       // the invocation type to determine if the method is private. We
       // then resolve again specifying the intended invocation type to
       // force the appropriate checks.
-      target_method = ResolveMethodWithoutInvokeType(*dex_file,
-                                                     method_handle.field_or_method_idx_,
+      target_method = ResolveMethodWithoutInvokeType(method_handle.field_or_method_idx_,
                                                      hs.NewHandle(referrer->GetDexCache()),
                                                      hs.NewHandle(referrer->GetClassLoader()));
       if (UNLIKELY(target_method == nullptr)) {
@@ -8406,7 +8494,7 @@
   DexFileParameterIterator it(*dex_file, target_method->GetPrototype());
   while (it.HasNext()) {
     const dex::TypeIndex type_idx = it.GetTypeIdx();
-    mirror::Class* klass = ResolveType(*dex_file, type_idx, dex_cache, class_loader);
+    ObjPtr<mirror::Class> klass = ResolveType(type_idx, dex_cache, class_loader);
     if (nullptr == klass) {
       DCHECK(self->IsExceptionPending());
       return nullptr;
@@ -8439,9 +8527,9 @@
   return mirror::MethodHandleImpl::Create(self, target, kind, method_type);
 }
 
-mirror::MethodHandle* ClassLinker::ResolveMethodHandle(Thread* self,
-                                                       uint32_t method_handle_idx,
-                                                       ArtMethod* referrer)
+ObjPtr<mirror::MethodHandle> ClassLinker::ResolveMethodHandle(Thread* self,
+                                                              uint32_t method_handle_idx,
+                                                              ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const DexFile* const dex_file = referrer->GetDexFile();
   const DexFile::MethodHandleItem& method_handle = dex_file->GetMethodHandle(method_handle_idx);
@@ -8655,10 +8743,10 @@
   DCHECK_EQ(h_dex_element_class.Get(), element_file_field->GetDeclaringClass());
 
   ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
-  DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->LookupType());
+  DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->LookupResolvedType());
 
   ArtField* file_name_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
-  DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->LookupType());
+  DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->LookupResolvedType());
 
   // Fill the elements array.
   int32_t index = 0;
@@ -8924,14 +9012,12 @@
 
 // Instantiate ResolveMethod.
 template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
-    const DexFile& dex_file,
     uint32_t method_idx,
     Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader,
     ArtMethod* referrer,
     InvokeType type);
 template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
-    const DexFile& dex_file,
     uint32_t method_idx,
     Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader,
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index a4c4f3d..10562f0 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -243,58 +243,61 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Resolve a String with the given index from the DexFile, storing the
-  // result in the DexCache.
-  mirror::String* ResolveString(const DexFile& dex_file,
-                                dex::StringIndex string_idx,
-                                Handle<mirror::DexCache> dex_cache)
+  // Resolve a String with the given index from the DexFile associated with the given DexCache,
+  // storing the result in the DexCache.
+  ObjPtr<mirror::String> ResolveString(dex::StringIndex string_idx,
+                                       Handle<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Find a String with the given index from the DexFile, storing the
-  // result in the DexCache if found. Return null if not found.
-  mirror::String* LookupString(const DexFile& dex_file,
-                               dex::StringIndex string_idx,
-                               ObjPtr<mirror::DexCache> dex_cache)
+  // Find a String with the given index from the DexFile associated with the given DexCache,
+  // storing the result in the DexCache if found. Return null if not found.
+  ObjPtr<mirror::String> LookupString(dex::StringIndex string_idx,
+                                      ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Resolve a Type with the given index from the DexFile, storing the
-  // result in the DexCache. The referrer is used to identify the
-  // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(const DexFile& dex_file,
-                             dex::TypeIndex type_idx,
-                             ObjPtr<mirror::Class> referrer)
+  // Resolve a Type with the given index from the DexFile associated with the given `referrer`,
+  // storing the result in the DexCache. The `referrer` is used to identify the target DexCache
+  // and ClassLoader to use for resolution.
+  ObjPtr<mirror::Class> ResolveType(dex::TypeIndex type_idx, ObjPtr<mirror::Class> referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  // Resolve a Type with the given index from the DexFile, storing the
-  // result in the DexCache. The referrer is used to identify the
-  // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(dex::TypeIndex type_idx, ArtMethod* referrer)
+  // Resolve a type with the given index from the DexFile associated with the given `referrer`,
+  // storing the result in the DexCache. The `referrer` is used to identify the target DexCache
+  // and ClassLoader to use for resolution.
+  ObjPtr<mirror::Class> ResolveType(dex::TypeIndex type_idx, ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  // Look up a resolved type with the given ID from the DexFile. The ClassLoader is used to search
-  // for the type, since it may be referenced from but not contained within the given DexFile.
-  ObjPtr<mirror::Class> LookupResolvedType(const DexFile& dex_file,
-                                           dex::TypeIndex type_idx,
+  // Resolve a type with the given index from the DexFile associated with the given DexCache
+  // and ClassLoader, storing the result in DexCache. The ClassLoader is used to search for
+  // the type, since it may be referenced from but not contained within the DexFile.
+  ObjPtr<mirror::Class> ResolveType(dex::TypeIndex type_idx,
+                                    Handle<mirror::DexCache> dex_cache,
+                                    Handle<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
+
+  // Look up a resolved type with the given index from the DexFile associated with the given
+  // `referrer`, storing the result in the DexCache. The `referrer` is used to identify the
+  // target DexCache and ClassLoader to use for lookup.
+  ObjPtr<mirror::Class> LookupResolvedType(dex::TypeIndex type_idx,
+                                           ObjPtr<mirror::Class> referrer)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Look up a resolved type with the given index from the DexFile associated with the given
+  // `referrer`, storing the result in the DexCache. The `referrer` is used to identify the
+  // target DexCache and ClassLoader to use for lookup.
+  ObjPtr<mirror::Class> LookupResolvedType(dex::TypeIndex type_idx, ArtMethod* referrer)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Look up a resolved type with the given index from the DexFile associated with the given
+  // DexCache and ClassLoader. The ClassLoader is used to search for the type, since it may
+  // be referenced from but not contained within the DexFile.
+  ObjPtr<mirror::Class> LookupResolvedType(dex::TypeIndex type_idx,
                                            ObjPtr<mirror::DexCache> dex_cache,
                                            ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  static ObjPtr<mirror::Class> LookupResolvedType(dex::TypeIndex type_idx,
-                                                  ObjPtr<mirror::DexCache> dex_cache,
-                                                  ObjPtr<mirror::ClassLoader> class_loader)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Resolve a type with the given ID from the DexFile, storing the
-  // result in DexCache. The ClassLoader is used to search for the
-  // type, since it may be referenced from but not contained within
-  // the given DexFile.
-  mirror::Class* ResolveType(const DexFile& dex_file,
-                             dex::TypeIndex type_idx,
-                             Handle<mirror::DexCache> dex_cache,
-                             Handle<mirror::ClassLoader> class_loader)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
   // Determine whether a dex cache result should be trusted, or an IncompatibleClassChangeError
   // check and IllegalAccessError check should be performed even after a hit.
@@ -309,14 +312,12 @@
                                   ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Resolve a method with a given ID from the DexFile, storing the
-  // result in DexCache. The ClassLinker and ClassLoader are used as
-  // in ResolveType. What is unique is the method type argument which
-  // is used to determine if this method is a direct, static, or
-  // virtual method.
+  // Resolve a method with a given ID from the DexFile associated with the given DexCache
+  // and ClassLoader, storing the result in DexCache. The ClassLinker and ClassLoader are
+  // used as in ResolveType. What is unique is the method type argument which is used to
+  // determine if this method is a direct, static, or virtual method.
   template <ResolveMode kResolveMode>
-  ArtMethod* ResolveMethod(const DexFile& dex_file,
-                           uint32_t method_idx,
+  ArtMethod* ResolveMethod(uint32_t method_idx,
                            Handle<mirror::DexCache> dex_cache,
                            Handle<mirror::ClassLoader> class_loader,
                            ArtMethod* referrer,
@@ -332,8 +333,7 @@
   ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, ArtMethod* referrer, InvokeType type)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
-  ArtMethod* ResolveMethodWithoutInvokeType(const DexFile& dex_file,
-                                            uint32_t method_idx,
+  ArtMethod* ResolveMethodWithoutInvokeType(uint32_t method_idx,
                                             Handle<mirror::DexCache> dex_cache,
                                             Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -345,47 +345,47 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  // Resolve a field with a given ID from the DexFile, storing the
-  // result in DexCache. The ClassLinker and ClassLoader are used as
-  // in ResolveType. What is unique is the is_static argument which is
-  // used to determine if we are resolving a static or non-static
-  // field.
-  ArtField* ResolveField(const DexFile& dex_file, uint32_t field_idx,
+  // Resolve a field with a given ID from the DexFile associated with the given DexCache
+  // and ClassLoader, storing the result in DexCache. The ClassLinker and ClassLoader
+  // are used as in ResolveType. What is unique is the is_static argument which is used
+  // to determine if we are resolving a static or non-static field.
+  ArtField* ResolveField(uint32_t field_idx,
                          Handle<mirror::DexCache> dex_cache,
-                         Handle<mirror::ClassLoader> class_loader, bool is_static)
+                         Handle<mirror::ClassLoader> class_loader,
+                         bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  // Resolve a field with a given ID from the DexFile, storing the
-  // result in DexCache. The ClassLinker and ClassLoader are used as
-  // in ResolveType. No is_static argument is provided so that Java
+  // Resolve a field with a given ID from the DexFile associated with the given DexCache
+  // and ClassLoader, storing the result in DexCache. The ClassLinker and ClassLoader
+  // are used as in ResolveType. No is_static argument is provided so that Java
   // field resolution semantics are followed.
-  ArtField* ResolveFieldJLS(const DexFile& dex_file,
-                            uint32_t field_idx,
+  ArtField* ResolveFieldJLS(uint32_t field_idx,
                             Handle<mirror::DexCache> dex_cache,
                             Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  // Resolve a method type with a given ID from the DexFile, storing
-  // the result in the DexCache.
-  mirror::MethodType* ResolveMethodType(Thread* self,
-                                        const DexFile& dex_file,
-                                        uint32_t proto_idx,
-                                        Handle<mirror::DexCache> dex_cache,
-                                        Handle<mirror::ClassLoader> class_loader)
+  // Resolve a method type with a given ID from the DexFile associated with a given DexCache
+  // and ClassLoader, storing the result in the DexCache.
+  ObjPtr<mirror::MethodType> ResolveMethodType(Thread* self,
+                                               uint32_t proto_idx,
+                                               Handle<mirror::DexCache> dex_cache,
+                                               Handle<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  mirror::MethodType* ResolveMethodType(Thread* self, uint32_t proto_idx, ArtMethod* referrer)
+  ObjPtr<mirror::MethodType> ResolveMethodType(Thread* self,
+                                               uint32_t proto_idx,
+                                               ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a method handle with a given ID from the DexFile. The
   // result is not cached in the DexCache as the instance will only be
   // used once in most circumstances.
-  mirror::MethodHandle* ResolveMethodHandle(Thread* self,
-                                            uint32_t method_handle_idx,
-                                            ArtMethod* referrer)
+  ObjPtr<mirror::MethodHandle> ResolveMethodHandle(Thread* self,
+                                                   uint32_t method_handle_idx,
+                                                   ArtMethod* referrer)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns true on success, false if there's an exception pending.
@@ -881,6 +881,19 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_);
 
+  // Implementation of LookupResolvedType() called when the type was not found in the dex cache.
+  ObjPtr<mirror::Class> DoLookupResolvedType(dex::TypeIndex type_idx,
+                                             ObjPtr<mirror::DexCache> dex_cache,
+                                             ObjPtr<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Implementation of ResolveType() called when the type was not found in the dex cache.
+  ObjPtr<mirror::Class> DoResolveType(dex::TypeIndex type_idx,
+                                      Handle<mirror::DexCache> dex_cache,
+                                      Handle<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
+
   // Finds a class by its descriptor, returning NULL if it isn't wasn't loaded
   // by the given 'class_loader'. Uses the provided hash for the descriptor.
   mirror::Class* LookupClass(Thread* self,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 892a850..246f89e 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -954,15 +954,14 @@
   ObjPtr<mirror::Class> klass = class_linker_->FindClass(soa.Self(), "LMyClass;", class_loader);
   dex::TypeIndex type_idx = klass->GetClassDef()->class_idx_;
   ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
-  const DexFile& dex_file = klass->GetDexFile();
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader.Get()),
+      class_linker_->LookupResolvedType(type_idx, dex_cache, class_loader.Get()),
       klass);
   // Zero out the resolved type and make sure LookupResolvedType still finds it.
   dex_cache->ClearResolvedType(type_idx);
   EXPECT_TRUE(dex_cache->GetResolvedType(type_idx) == nullptr);
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader.Get()),
+      class_linker_->LookupResolvedType(type_idx, dex_cache, class_loader.Get()),
       klass);
 }
 
@@ -983,7 +982,7 @@
   dex::TypeIndex array_idx = dex_file.GetIndexForTypeId(*array_id);
   // Check that the array class wasn't resolved yet.
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, array_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(array_idx, dex_cache.Get(), class_loader.Get()),
       ObjPtr<mirror::Class>(nullptr));
   // Resolve the array class we want to test.
   ObjPtr<mirror::Class> array_klass
@@ -991,13 +990,13 @@
   ASSERT_OBJ_PTR_NE(array_klass, ObjPtr<mirror::Class>(nullptr));
   // Test that LookupResolvedType() finds the array class.
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, array_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(array_idx, dex_cache.Get(), class_loader.Get()),
       array_klass);
   // Zero out the resolved type and make sure LookupResolvedType() still finds it.
   dex_cache->ClearResolvedType(array_idx);
   EXPECT_TRUE(dex_cache->GetResolvedType(array_idx) == nullptr);
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, array_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(array_idx, dex_cache.Get(), class_loader.Get()),
       array_klass);
 }
 
@@ -1012,15 +1011,14 @@
   ASSERT_OBJ_PTR_NE(klass.Get(), ObjPtr<mirror::Class>(nullptr));
   dex::TypeIndex type_idx = klass->GetClassDef()->class_idx_;
   Handle<mirror::DexCache> dex_cache = hs.NewHandle(klass->GetDexCache());
-  const DexFile& dex_file = klass->GetDexFile();
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(type_idx, dex_cache.Get(), class_loader.Get()),
       klass.Get());
   // Zero out the resolved type and make sure LookupResolvedType still finds it.
   dex_cache->ClearResolvedType(type_idx);
   EXPECT_TRUE(dex_cache->GetResolvedType(type_idx) == nullptr);
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(type_idx, dex_cache.Get(), class_loader.Get()),
       klass.Get());
   // Force initialization to turn the class erroneous.
   bool initialized = class_linker_->EnsureInitialized(soa.Self(),
@@ -1032,13 +1030,13 @@
   soa.Self()->ClearException();
   // Check that the LookupResolvedType() can still find the resolved type.
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(type_idx, dex_cache.Get(), class_loader.Get()),
       klass.Get());
   // Zero out the resolved type and make sure LookupResolvedType() still finds it.
   dex_cache->ClearResolvedType(type_idx);
   EXPECT_TRUE(dex_cache->GetResolvedType(type_idx) == nullptr);
   EXPECT_OBJ_PTR_EQ(
-      class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache.Get(), class_loader.Get()),
+      class_linker_->LookupResolvedType(type_idx, dex_cache.Get(), class_loader.Get()),
       klass.Get());
 }
 
@@ -1304,10 +1302,18 @@
   const DexFile::TypeId* type_id = dex_file->FindTypeId("LStaticsFromCode;");
   ASSERT_TRUE(type_id != nullptr);
   dex::TypeIndex type_idx = dex_file->GetIndexForTypeId(*type_id);
-  mirror::Class* uninit = ResolveVerifyAndClinit(type_idx, clinit, soa.Self(), true, false);
+  ObjPtr<mirror::Class> uninit = ResolveVerifyAndClinit(type_idx,
+                                                        clinit,
+                                                        soa.Self(),
+                                                        /* can_run_clinit */ true,
+                                                        /* verify_access */ false);
   EXPECT_TRUE(uninit != nullptr);
   EXPECT_FALSE(uninit->IsInitialized());
-  mirror::Class* init = ResolveVerifyAndClinit(type_idx, getS0, soa.Self(), true, false);
+  ObjPtr<mirror::Class> init = ResolveVerifyAndClinit(type_idx,
+                                                      getS0,
+                                                      soa.Self(),
+                                                      /* can_run_clinit */ true,
+                                                      /* verify_access */ false);
   EXPECT_TRUE(init != nullptr);
   EXPECT_TRUE(init->IsInitialized());
 }
@@ -1541,11 +1547,7 @@
   // Its RType = Ljava/lang/String;
   // Its PTypes = { Ljava/lang/String; }
   Handle<mirror::MethodType> method1_type = hs.NewHandle(
-      class_linker_->ResolveMethodType(soa.Self(),
-                                       dex_file,
-                                       method1_id.proto_idx_,
-                                       dex_cache,
-                                       class_loader));
+      class_linker_->ResolveMethodType(soa.Self(), method1_id.proto_idx_, dex_cache, class_loader));
 
   // Assert that the method type was resolved successfully.
   ASSERT_TRUE(method1_type != nullptr);
@@ -1559,11 +1561,7 @@
 
   // Resolve the method type again and assert that we get back the same value.
   Handle<mirror::MethodType> method1_type2 = hs.NewHandle(
-      class_linker_->ResolveMethodType(soa.Self(),
-                                       dex_file,
-                                       method1_id.proto_idx_,
-                                       dex_cache,
-                                       class_loader));
+      class_linker_->ResolveMethodType(soa.Self(), method1_id.proto_idx_, dex_cache, class_loader));
   ASSERT_EQ(method1_type.Get(), method1_type2.Get());
 
   // Resolve the MethodType associated with a different method signature
@@ -1576,11 +1574,7 @@
   ASSERT_FALSE(method2->IsDirect());
   const DexFile::MethodId& method2_id = dex_file.GetMethodId(method2->GetDexMethodIndex());
   Handle<mirror::MethodType> method2_type = hs.NewHandle(
-      class_linker_->ResolveMethodType(soa.Self(),
-                                       dex_file,
-                                       method2_id.proto_idx_,
-                                       dex_cache,
-                                       class_loader));
+      class_linker_->ResolveMethodType(soa.Self(), method2_id.proto_idx_, dex_cache, class_loader));
   ASSERT_TRUE(method1_type.Get() != method2_type.Get());
 }
 
diff --git a/runtime/code_item_accessors-inl.h b/runtime/code_item_accessors-inl.h
index d04849d..4f4d8cc 100644
--- a/runtime/code_item_accessors-inl.h
+++ b/runtime/code_item_accessors-inl.h
@@ -22,6 +22,7 @@
 #include "art_method-inl.h"
 #include "cdex/compact_dex_file.h"
 #include "dex_file-inl.h"
+#include "oat_file.h"
 #include "standard_dex_file.h"
 
 namespace art {
@@ -37,7 +38,7 @@
 }
 
 inline void CodeItemInstructionAccessor::Init(const DexFile* dex_file,
-                                               const DexFile::CodeItem* code_item) {
+                                              const DexFile::CodeItem* code_item) {
   DCHECK(dex_file != nullptr);
   DCHECK(code_item != nullptr);
   if (dex_file->IsCompactDexFile()) {
@@ -150,6 +151,31 @@
   return index != -1 ? &try_items.begin()[index] : nullptr;
 }
 
+inline CodeItemDebugInfoAccessor::CodeItemDebugInfoAccessor(ArtMethod* method)
+    : CodeItemDebugInfoAccessor(method->GetDexFile(), method->GetCodeItem()) {}
+
+inline CodeItemDebugInfoAccessor::CodeItemDebugInfoAccessor(const DexFile* dex_file,
+                                                            const DexFile::CodeItem* code_item) {
+  if (code_item == nullptr) {
+    return;
+  }
+  debug_info_offset_ = OatFile::GetDebugInfoOffset(*dex_file, code_item);
+  if (dex_file->IsCompactDexFile()) {
+    Init(down_cast<const CompactDexFile::CodeItem&>(*code_item));
+  } else {
+    DCHECK(dex_file->IsStandardDexFile());
+    Init(down_cast<const StandardDexFile::CodeItem&>(*code_item));
+  }
+}
+
+inline void CodeItemDebugInfoAccessor::Init(const CompactDexFile::CodeItem& code_item) {
+  CodeItemDataAccessor::Init(code_item);
+}
+
+inline void CodeItemDebugInfoAccessor::Init(const StandardDexFile::CodeItem& code_item) {
+  CodeItemDataAccessor::Init(code_item);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CODE_ITEM_ACCESSORS_INL_H_
diff --git a/runtime/code_item_accessors.h b/runtime/code_item_accessors.h
index aa1305a..a089a27 100644
--- a/runtime/code_item_accessors.h
+++ b/runtime/code_item_accessors.h
@@ -132,6 +132,30 @@
   uint16_t tries_size_;
 };
 
+// Abstract accesses to code item data including debug info offset. More heavy weight than the other
+// helpers.
+class CodeItemDebugInfoAccessor : public CodeItemDataAccessor {
+ public:
+  CodeItemDebugInfoAccessor() = default;
+
+  // Handles null code items, but not null dex files.
+  ALWAYS_INLINE CodeItemDebugInfoAccessor(const DexFile* dex_file,
+                                          const DexFile::CodeItem* code_item);
+
+  ALWAYS_INLINE explicit CodeItemDebugInfoAccessor(ArtMethod* method);
+
+  uint32_t DebugInfoOffset() const {
+    return debug_info_offset_;
+  }
+
+ protected:
+  ALWAYS_INLINE void Init(const CompactDexFile::CodeItem& code_item);
+  ALWAYS_INLINE void Init(const StandardDexFile::CodeItem& code_item);
+
+ private:
+  uint32_t debug_info_offset_ = 0u;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_CODE_ITEM_ACCESSORS_H_
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index ef1647c..6db4d92 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -30,6 +30,7 @@
 #include "base/file_utils.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/runtime_debug.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 5be8d5b..0f931e3 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -22,6 +22,8 @@
 
 #include <string>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
 #include "base/mutex.h"
 #include "globals.h"
@@ -32,6 +34,9 @@
 
 namespace art {
 
+using LogSeverity = android::base::LogSeverity;
+using ScopedLogSeverity = android::base::ScopedLogSeverity;
+
 // OBJ pointer helpers to avoid needing .Decode everywhere.
 #define EXPECT_OBJ_PTR_EQ(a, b) EXPECT_EQ(MakeObjPtr(a).Ptr(), MakeObjPtr(b).Ptr());
 #define ASSERT_OBJ_PTR_EQ(a, b) ASSERT_EQ(MakeObjPtr(a).Ptr(), MakeObjPtr(b).Ptr());
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index cd52bb6..575d18e 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -18,11 +18,11 @@
 
 #include <sstream>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
@@ -441,7 +441,7 @@
   return addr == monitor_offset;
 }
 
-static bool IsValidImplicitCheck(uintptr_t addr, ArtMethod* method, const Instruction& instr)
+static bool IsValidImplicitCheck(uintptr_t addr, const Instruction& instr)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (!CanDoImplicitNullCheckOn(addr)) {
     return false;
@@ -483,9 +483,10 @@
     case Instruction::IPUT_BYTE:
     case Instruction::IPUT_CHAR:
     case Instruction::IPUT_SHORT: {
-      ArtField* field =
-          Runtime::Current()->GetClassLinker()->ResolveField(instr.VRegC_22c(), method, false);
-      return (addr == 0) || (addr == field->GetOffset().Uint32Value());
+      // We might be doing an implicit null check with an offset that doesn't correspond
+      // to the instruction, for example with two field accesses and the first one being
+      // eliminated or re-ordered.
+      return true;
     }
 
     case Instruction::IGET_OBJECT_QUICK:
@@ -506,7 +507,10 @@
     case Instruction::IPUT_SHORT_QUICK:
     case Instruction::IPUT_WIDE_QUICK:
     case Instruction::IPUT_OBJECT_QUICK: {
-      return (addr == 0u) || (addr == instr.VRegC_22c());
+      // We might be doing an implicit null check with an offset that doesn't correspond
+      // to the instruction, for example with two field accesses and the first one being
+      // eliminated or re-ordered.
+      return true;
     }
 
     case Instruction::AGET_OBJECT:
@@ -550,7 +554,7 @@
   const DexFile::CodeItem* code = method->GetCodeItem();
   CHECK_LT(throw_dex_pc, code->insns_size_in_code_units_);
   const Instruction* instr = Instruction::At(&code->insns_[throw_dex_pc]);
-  if (check_address && !IsValidImplicitCheck(addr, method, *instr)) {
+  if (check_address && !IsValidImplicitCheck(addr, *instr)) {
     const DexFile* dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
     LOG(FATAL) << "Invalid address for an implicit NullPointerException check: "
                << "0x" << std::hex << addr << std::dec
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 1dcd935..b5ae09f 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -963,7 +963,11 @@
   }
   VariableSizedHandleScope hs(Thread::Current());
   std::vector<Handle<mirror::Object>> raw_instances;
-  Runtime::Current()->GetHeap()->GetInstances(hs, hs.NewHandle(c), max_count, raw_instances);
+  Runtime::Current()->GetHeap()->GetInstances(hs,
+                                              hs.NewHandle(c),
+                                              /* use_is_assignable_from */ false,
+                                              max_count,
+                                              raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
     instances->push_back(gRegistry->Add(raw_instances[i].Get()));
   }
@@ -1657,16 +1661,16 @@
     }
   };
   ArtMethod* m = FromMethodId(method_id);
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
+  CodeItemDebugInfoAccessor accessor(m);
   uint64_t start, end;
-  if (code_item == nullptr) {
+  if (!accessor.HasCodeItem()) {
     DCHECK(m->IsNative() || m->IsProxyMethod());
     start = -1;
     end = -1;
   } else {
     start = 0;
     // Return the index of the last instruction
-    end = code_item->insns_size_in_code_units_ - 1;
+    end = accessor.InsnsSizeInCodeUnits() - 1;
   }
 
   expandBufAdd8BE(pReply, start);
@@ -1680,10 +1684,10 @@
   context.numItems = 0;
   context.pReply = pReply;
 
-  if (code_item != nullptr) {
-    uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*(m->GetDexFile()), code_item);
-    m->GetDexFile()->DecodeDebugPositionInfo(
-        code_item, debug_info_offset, DebugCallbackContext::Callback, &context);
+  if (accessor.HasCodeItem()) {
+    m->GetDexFile()->DecodeDebugPositionInfo(accessor.DebugInfoOffset(),
+                                             DebugCallbackContext::Callback,
+                                             &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + numLinesOffset, context.numItems);
@@ -1723,6 +1727,7 @@
     }
   };
   ArtMethod* m = FromMethodId(method_id);
+  CodeItemDebugInfoAccessor accessor(m);
 
   // arg_count considers doubles and longs to take 2 units.
   // variable_count considers everything to take 1 unit.
@@ -1738,12 +1743,15 @@
   context.variable_count = 0;
   context.with_generic = with_generic;
 
-  const DexFile::CodeItem* code_item = m->GetCodeItem();
-  if (code_item != nullptr) {
-    uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*(m->GetDexFile()), code_item);
-    m->GetDexFile()->DecodeDebugLocalInfo(
-        code_item, debug_info_offset, m->IsStatic(), m->GetDexMethodIndex(),
-        DebugCallbackContext::Callback, &context);
+  if (accessor.HasCodeItem()) {
+    m->GetDexFile()->DecodeDebugLocalInfo(accessor.RegistersSize(),
+                                          accessor.InsSize(),
+                                          accessor.InsnsSizeInCodeUnits(),
+                                          accessor.DebugInfoOffset(),
+                                          m->IsStatic(),
+                                          m->GetDexMethodIndex(),
+                                          DebugCallbackContext::Callback,
+                                          &context);
   }
 
   JDWP::Set4BE(expandBufGetBuffer(pReply) + variable_count_offset, context.variable_count);
@@ -3832,9 +3840,9 @@
   // Find the dex_pc values that correspond to the current line, for line-based single-stepping.
   struct DebugCallbackContext {
     DebugCallbackContext(SingleStepControl* single_step_control_cb,
-                         int32_t line_number_cb, const DexFile::CodeItem* code_item)
+                         int32_t line_number_cb, uint32_t num_insns_in_code_units)
         : single_step_control_(single_step_control_cb), line_number_(line_number_cb),
-          code_item_(code_item), last_pc_valid(false), last_pc(0) {
+          num_insns_in_code_units_(num_insns_in_code_units), last_pc_valid(false), last_pc(0) {
     }
 
     static bool Callback(void* raw_context, const DexFile::PositionInfo& entry) {
@@ -3860,8 +3868,7 @@
     ~DebugCallbackContext() {
       // If the line number was the last in the position table...
       if (last_pc_valid) {
-        size_t end = code_item_->insns_size_in_code_units_;
-        for (uint32_t dex_pc = last_pc; dex_pc < end; ++dex_pc) {
+        for (uint32_t dex_pc = last_pc; dex_pc < num_insns_in_code_units_; ++dex_pc) {
           single_step_control_->AddDexPc(dex_pc);
         }
       }
@@ -3869,7 +3876,7 @@
 
     SingleStepControl* const single_step_control_;
     const int32_t line_number_;
-    const DexFile::CodeItem* const code_item_;
+    const uint32_t num_insns_in_code_units_;
     bool last_pc_valid;
     uint32_t last_pc;
   };
@@ -3888,11 +3895,11 @@
   // Note: if the thread is not running Java code (pure native thread), there is no "current"
   // method on the stack (and no line number either).
   if (m != nullptr && !m->IsNative()) {
-    const DexFile::CodeItem* const code_item = m->GetCodeItem();
-    DebugCallbackContext context(single_step_control, line_number, code_item);
-    uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*(m->GetDexFile()), code_item);
-    m->GetDexFile()->DecodeDebugPositionInfo(
-        code_item, debug_info_offset, DebugCallbackContext::Callback, &context);
+    CodeItemDebugInfoAccessor accessor(m);
+    DebugCallbackContext context(single_step_control, line_number, accessor.InsnsSizeInCodeUnits());
+    m->GetDexFile()->DecodeDebugPositionInfo(accessor.DebugInfoOffset(),
+                                             DebugCallbackContext::Callback,
+                                             &context);
   }
 
   // Activate single-step in the thread.
@@ -4372,15 +4379,20 @@
                              replyData.get(),
                              offset,
                              length);
-  if (length == 0 || replyData.get() == nullptr) {
-    return false;
-  }
-
   out_data->resize(length);
   env->GetByteArrayRegion(replyData.get(),
                           offset,
                           length,
                           reinterpret_cast<jbyte*>(out_data->data()));
+
+  if (env->ExceptionCheck()) {
+    LOG(INFO) << StringPrintf("Exception thrown when reading response data from dispatcher 0x%08x",
+                              type);
+    env->ExceptionDescribe();
+    env->ExceptionClear();
+    return false;
+  }
+
   return true;
 }
 
@@ -4414,7 +4426,7 @@
   std::vector<uint8_t> out_data;
   uint32_t out_type = 0;
   request->Skip(request_length);
-  if (!DdmHandleChunk(env, type, data, &out_type, &out_data)) {
+  if (!DdmHandleChunk(env, type, data, &out_type, &out_data) || out_data.empty()) {
     return false;
   }
   const uint32_t kDdmHeaderSize = 8;
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 1880968..a6f7621 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -19,7 +19,6 @@
 
 #include "base/bit_utils.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/stringpiece.h"
 #include "cdex/compact_dex_file.h"
 #include "dex_file.h"
@@ -385,13 +384,16 @@
 }
 
 template<typename NewLocalCallback>
-bool DexFile::DecodeDebugLocalInfo(const CodeItem* code_item,
+bool DexFile::DecodeDebugLocalInfo(uint32_t registers_size,
+                                   uint32_t ins_size,
+                                   uint32_t insns_size_in_code_units,
                                    uint32_t debug_info_offset,
                                    bool is_static,
                                    uint32_t method_idx,
                                    NewLocalCallback new_local_callback,
                                    void* context) const {
-  if (code_item == nullptr) {
+  const uint8_t* const stream = GetDebugInfoStream(debug_info_offset);
+  if (stream == nullptr) {
     return false;
   }
   std::vector<const char*> arg_descriptors;
@@ -399,15 +401,15 @@
   for (; it.HasNext(); it.Next()) {
     arg_descriptors.push_back(it.GetDescriptor());
   }
-  return DecodeDebugLocalInfo(GetDebugInfoStream(debug_info_offset),
+  return DecodeDebugLocalInfo(stream,
                               GetLocation(),
                               GetMethodDeclaringClassDescriptor(GetMethodId(method_idx)),
                               arg_descriptors,
                               this->PrettyMethod(method_idx),
                               is_static,
-                              code_item->registers_size_,
-                              code_item->ins_size_,
-                              code_item->insns_size_in_code_units_,
+                              registers_size,
+                              ins_size,
+                              insns_size_in_code_units,
                               [this](uint32_t idx) {
                                 return StringDataByIdx(dex::StringIndex(idx));
                               },
@@ -488,13 +490,9 @@
 }
 
 template<typename DexDebugNewPosition>
-bool DexFile::DecodeDebugPositionInfo(const CodeItem* code_item,
-                                      uint32_t debug_info_offset,
+bool DexFile::DecodeDebugPositionInfo(uint32_t debug_info_offset,
                                       DexDebugNewPosition position_functor,
                                       void* context) const {
-  if (code_item == nullptr) {
-    return false;
-  }
   return DecodeDebugPositionInfo(GetDebugInfoStream(debug_info_offset),
                                  [this](uint32_t idx) {
                                    return StringDataByIdx(dex::StringIndex(idx));
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index af79207..2d02415 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,7 +29,6 @@
 #include "android-base/stringprintf.h"
 
 #include "base/enums.h"
-#include "base/logging.h"
 #include "base/stl_util.h"
 #include "dex_file-inl.h"
 #include "leb128.h"
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 944a308..de3af8a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -21,8 +21,10 @@
 #include <string>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "base/iteration_range.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "base/value_object.h"
 #include "dex_file_types.h"
 #include "dex_instruction_iterator.h"
@@ -954,7 +956,9 @@
                                    NewLocalCallback new_local,
                                    void* context);
   template<typename NewLocalCallback>
-  bool DecodeDebugLocalInfo(const CodeItem* code_item,
+  bool DecodeDebugLocalInfo(uint32_t registers_size,
+                            uint32_t ins_size,
+                            uint32_t insns_size_in_code_units,
                             uint32_t debug_info_offset,
                             bool is_static,
                             uint32_t method_idx,
@@ -968,8 +972,7 @@
                                       DexDebugNewPosition position_functor,
                                       void* context);
   template<typename DexDebugNewPosition>
-  bool DecodeDebugPositionInfo(const CodeItem* code_item,
-                               uint32_t debug_info_offset,
+  bool DecodeDebugPositionInfo(uint32_t debug_info_offset,
                                DexDebugNewPosition position_functor,
                                void* context) const;
 
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index b44bd51..72b18fb 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -343,8 +343,7 @@
   StackHandleScope<4> hs(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Handle<mirror::Class> annotation_class(hs.NewHandle(
-      class_linker->ResolveType(klass.GetDexFile(),
-                                dex::TypeIndex(type_index),
+      class_linker->ResolveType(dex::TypeIndex(type_index),
                                 hs.NewHandle(klass.GetDexCache()),
                                 hs.NewHandle(klass.GetClassLoader()))));
   if (annotation_class == nullptr) {
@@ -458,7 +457,7 @@
       } else {
         StackHandleScope<1> hs(self);
         element_object = Runtime::Current()->GetClassLinker()->ResolveString(
-            klass.GetDexFile(), dex::StringIndex(index), hs.NewHandle(klass.GetDexCache()));
+            dex::StringIndex(index), hs.NewHandle(klass.GetDexCache()));
         set_object = true;
         if (element_object == nullptr) {
           return false;
@@ -474,7 +473,6 @@
         dex::TypeIndex type_index(index);
         StackHandleScope<2> hs(self);
         element_object = Runtime::Current()->GetClassLinker()->ResolveType(
-            klass.GetDexFile(),
             type_index,
             hs.NewHandle(klass.GetDexCache()),
             hs.NewHandle(klass.GetClassLoader()));
@@ -501,7 +499,6 @@
         ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
         StackHandleScope<2> hs(self);
         ArtMethod* method = class_linker->ResolveMethodWithoutInvokeType(
-            klass.GetDexFile(),
             index,
             hs.NewHandle(klass.GetDexCache()),
             hs.NewHandle(klass.GetClassLoader()));
@@ -540,7 +537,6 @@
       } else {
         StackHandleScope<2> hs(self);
         ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(
-            klass.GetDexFile(),
             index,
             hs.NewHandle(klass.GetDexCache()),
             hs.NewHandle(klass.GetClassLoader()));
@@ -569,7 +565,6 @@
       } else {
         StackHandleScope<3> hs(self);
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
-            klass.GetDexFile(),
             index,
             hs.NewHandle(klass.GetDexCache()),
             hs.NewHandle(klass.GetClassLoader()),
@@ -783,10 +778,8 @@
     uint32_t type_index = DecodeUnsignedLeb128(&annotation);
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     Thread* self = Thread::Current();
-    mirror::Class* resolved_class;
     StackHandleScope<2> hs(self);
-    resolved_class = class_linker->ResolveType(
-        klass.GetDexFile(),
+    ObjPtr<mirror::Class> resolved_class = class_linker->ResolveType(
         dex::TypeIndex(type_index),
         hs.NewHandle(klass.GetDexCache()),
         hs.NewHandle(klass.GetClassLoader()));
@@ -1401,7 +1394,6 @@
   }
   StackHandleScope<2> hs(Thread::Current());
   ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
-      data.GetDexFile(),
       annotation_value.value_.GetI(),
       hs.NewHandle(data.GetDexCache()),
       hs.NewHandle(data.GetClassLoader()));
@@ -1567,21 +1559,18 @@
     return -2;
   }
 
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
-  DCHECK(code_item != nullptr) << method->PrettyMethod() << " " << dex_file->GetLocation();
+  CodeItemDebugInfoAccessor accessor(method);
+  DCHECK(accessor.HasCodeItem()) << method->PrettyMethod() << " " << dex_file->GetLocation();
 
   // A method with no line number info should return -1
   DexFile::LineNumFromPcContext context(rel_pc, -1);
-  uint32_t debug_info_offset = OatFile::GetDebugInfoOffset(*dex_file, code_item);
-  dex_file->DecodeDebugPositionInfo(
-      code_item, debug_info_offset, DexFile::LineNumForPcCb, &context);
+  dex_file->DecodeDebugPositionInfo(accessor.DebugInfoOffset(), DexFile::LineNumForPcCb, &context);
   return context.line_num_;
 }
 
 template<bool kTransactionActive>
 void RuntimeEncodedStaticFieldValueIterator::ReadValueToField(ArtField* field) const {
   DCHECK(dex_cache_ != nullptr);
-  DCHECK(class_loader_ != nullptr);
   switch (type_) {
     case kBoolean: field->SetBoolean<kTransactionActive>(field->GetDeclaringClass(), jval_.z);
         break;
@@ -1594,17 +1583,15 @@
     case kDouble:  field->SetDouble<kTransactionActive>(field->GetDeclaringClass(), jval_.d); break;
     case kNull:    field->SetObject<kTransactionActive>(field->GetDeclaringClass(), nullptr); break;
     case kString: {
-      mirror::String* resolved = linker_->ResolveString(dex_file_,
-                                                        dex::StringIndex(jval_.i),
-                                                        *dex_cache_);
+      ObjPtr<mirror::String> resolved = linker_->ResolveString(dex::StringIndex(jval_.i),
+                                                               dex_cache_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
       break;
     }
     case kType: {
-      mirror::Class* resolved = linker_->ResolveType(dex_file_,
-                                                     dex::TypeIndex(jval_.i),
-                                                     *dex_cache_,
-                                                     *class_loader_);
+      ObjPtr<mirror::Class> resolved = linker_->ResolveType(dex::TypeIndex(jval_.i),
+                                                            dex_cache_,
+                                                            class_loader_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
       break;
     }
diff --git a/runtime/dex_file_annotations.h b/runtime/dex_file_annotations.h
index a934a4f..9ff0929 100644
--- a/runtime/dex_file_annotations.h
+++ b/runtime/dex_file_annotations.h
@@ -19,18 +19,18 @@
 
 #include "dex_file.h"
 
+#include "handle.h"
+#include "mirror/dex_cache.h"
 #include "mirror/object_array.h"
 
 namespace art {
 
 namespace mirror {
 class ClassLoader;
-class DexCache;
 }  // namespace mirror
 class ArtField;
 class ArtMethod;
 class ClassLinker;
-template<class T> class MutableHandle;
 
 namespace annotations {
 
@@ -116,13 +116,12 @@
 class RuntimeEncodedStaticFieldValueIterator : public EncodedStaticFieldValueIterator {
  public:
   // A constructor meant to be called from runtime code.
-  RuntimeEncodedStaticFieldValueIterator(const DexFile& dex_file,
-                                         Handle<mirror::DexCache>* dex_cache,
-                                         Handle<mirror::ClassLoader>* class_loader,
+  RuntimeEncodedStaticFieldValueIterator(Handle<mirror::DexCache> dex_cache,
+                                         Handle<mirror::ClassLoader> class_loader,
                                          ClassLinker* linker,
                                          const DexFile::ClassDef& class_def)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : EncodedStaticFieldValueIterator(dex_file, class_def),
+      : EncodedStaticFieldValueIterator(*dex_cache->GetDexFile(), class_def),
         dex_cache_(dex_cache),
         class_loader_(class_loader),
         linker_(linker) {
@@ -132,9 +131,9 @@
   void ReadValueToField(ArtField* field) const REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  Handle<mirror::DexCache>* const dex_cache_;  // Dex cache to resolve literal objects.
-  Handle<mirror::ClassLoader>* const class_loader_;  // ClassLoader to resolve types.
-  ClassLinker* linker_;  // Linker to resolve literal objects.
+  const Handle<mirror::DexCache> dex_cache_;  // Dex cache to resolve literal objects.
+  const Handle<mirror::ClassLoader> class_loader_;  // ClassLoader to resolve types.
+  ClassLinker* const linker_;  // Linker to resolve literal objects.
   DISALLOW_IMPLICIT_CONSTRUCTORS(RuntimeEncodedStaticFieldValueIterator);
 };
 
diff --git a/runtime/dex_file_layout.h b/runtime/dex_file_layout.h
index 4c960c3..9fac5f8 100644
--- a/runtime/dex_file_layout.h
+++ b/runtime/dex_file_layout.h
@@ -21,7 +21,7 @@
 #include <cstdint>
 #include <iosfwd>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 14c36b4..6905504 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -731,7 +731,14 @@
   const DexFile::ClassDef& class_def = raw->GetClassDef(0);
   const DexFile::CodeItem* code_item = raw->GetCodeItem(raw->FindCodeItemOffset(class_def, 1));
   uint32_t debug_info_offset = raw->GetDebugInfoOffset(code_item);
-  ASSERT_TRUE(raw->DecodeDebugLocalInfo(code_item, debug_info_offset, true, 1, Callback, nullptr));
+  ASSERT_TRUE(raw->DecodeDebugLocalInfo(code_item->registers_size_,
+                                        code_item->ins_size_,
+                                        code_item->insns_size_in_code_units_,
+                                        debug_info_offset,
+                                        true,
+                                        1,
+                                        Callback,
+                                        nullptr));
 }
 
 }  // namespace art
diff --git a/runtime/dex_file_tracking_registrar.cc b/runtime/dex_file_tracking_registrar.cc
index 874d8ea..4de4376 100644
--- a/runtime/dex_file_tracking_registrar.cc
+++ b/runtime/dex_file_tracking_registrar.cc
@@ -19,6 +19,8 @@
 #include <deque>
 #include <tuple>
 
+#include <android-base/logging.h>
+
 // For dex tracking through poisoning. Note: Requires forcing sanitization. This is the reason for
 // the ifdefs and early include.
 #ifdef ART_DEX_FILE_ACCESS_TRACKING
@@ -28,7 +30,6 @@
 #endif
 #include "base/memory_tool.h"
 
-#include "base/logging.h"
 #include "dex_file-inl.h"
 
 namespace art {
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index 4041820..3ced692 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -17,7 +17,8 @@
 #ifndef ART_RUNTIME_DEX_INSTRUCTION_H_
 #define ART_RUNTIME_DEX_INSTRUCTION_H_
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "globals.h"
 
diff --git a/runtime/dex_instruction_iterator.h b/runtime/dex_instruction_iterator.h
index be583a2..eabe009 100644
--- a/runtime/dex_instruction_iterator.h
+++ b/runtime/dex_instruction_iterator.h
@@ -19,8 +19,10 @@
 
 #include <iterator>
 
+#include <android-base/logging.h>
+
 #include "dex_instruction.h"
-#include "base/logging.h"
+#include "base/macros.h"
 
 namespace art {
 
diff --git a/runtime/dex_to_dex_decompiler.cc b/runtime/dex_to_dex_decompiler.cc
index a5ebade..a4e4fb5 100644
--- a/runtime/dex_to_dex_decompiler.cc
+++ b/runtime/dex_to_dex_decompiler.cc
@@ -16,7 +16,9 @@
 
 #include "dex_to_dex_decompiler.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "base/mutex.h"
 #include "bytecode_utils.h"
 #include "dex_file-inl.h"
diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc
index 3c8243a..d93d767 100644
--- a/runtime/dexopt_test.cc
+++ b/runtime/dexopt_test.cc
@@ -218,10 +218,11 @@
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
   ASSERT_TRUE(map.get() != nullptr) << "Failed to build process map";
-  for (BacktraceMap::const_iterator it = map->begin();
+  for (BacktraceMap::iterator it = map->begin();
       reservation_start < reservation_end && it != map->end(); ++it) {
-    ReserveImageSpaceChunk(reservation_start, std::min(it->start, reservation_end));
-    reservation_start = std::max(reservation_start, it->end);
+    const backtrace_map_t* entry = *it;
+    ReserveImageSpaceChunk(reservation_start, std::min(entry->start, reservation_end));
+    reservation_start = std::max(reservation_start, entry->end);
   }
   ReserveImageSpaceChunk(reservation_start, reservation_end);
 }
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index afe4eeb..d057ff3 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -25,7 +25,6 @@
 #include "android-base/strings.h"
 
 #include "arch/instruction_set.h"
-#include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "elf_file_impl.h"
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index 418d937..0cac8e8 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -19,11 +19,11 @@
 
 #include <sys/cdefs.h>
 
+#include <android-base/logging.h>
+
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
 #include "./elf.h"
 
-#include "base/logging.h"
-
 namespace art {
 
 // Architecture dependent flags for the ELF header.
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 8253739..9e50850 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -245,7 +245,7 @@
     *slow_path = true;
     return nullptr;  // Failure
   }
-  mirror::Class* klass = method->GetDexCache()->GetResolvedType(type_idx);
+  ObjPtr<mirror::Class> klass = method->GetDexCache()->GetResolvedType(type_idx);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     klass = class_linker->ResolveType(type_idx, method);
@@ -264,7 +264,7 @@
       return nullptr;  // Failure
     }
   }
-  return klass;
+  return klass.Ptr();
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
@@ -349,8 +349,7 @@
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(method->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(method->GetClassLoader()));
 
-    resolved_field = class_linker->ResolveFieldJLS(*method->GetDexFile(),
-                                                   field_idx,
+    resolved_field = class_linker->ResolveFieldJLS(field_idx,
                                                    h_dex_cache,
                                                    h_class_loader);
   } else {
@@ -500,7 +499,8 @@
       Handle<mirror::Class> h_referring_class(hs2.NewHandle(referrer->GetDeclaringClass()));
       const dex::TypeIndex method_type_idx =
           referrer->GetDexFile()->GetMethodId(method_idx).class_idx_;
-      mirror::Class* method_reference_class = class_linker->ResolveType(method_type_idx, referrer);
+      ObjPtr<mirror::Class> method_reference_class =
+          class_linker->ResolveType(method_type_idx, referrer);
       if (UNLIKELY(method_reference_class == nullptr)) {
         // Bad type idx.
         CHECK(self->IsExceptionPending());
@@ -682,7 +682,7 @@
   } else if (type == kSuper) {
     // TODO This lookup is rather slow.
     dex::TypeIndex method_type_idx = dex_cache->GetDexFile()->GetMethodId(method_idx).class_idx_;
-    ObjPtr<mirror::Class> method_reference_class = ClassLinker::LookupResolvedType(
+    ObjPtr<mirror::Class> method_reference_class = linker->LookupResolvedType(
         method_type_idx, dex_cache, referrer->GetClassLoader());
     if (method_reference_class == nullptr) {
       // Need to do full type resolution...
@@ -711,13 +711,13 @@
   }
 }
 
-inline mirror::Class* ResolveVerifyAndClinit(dex::TypeIndex type_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self,
-                                             bool can_run_clinit,
-                                             bool verify_access) {
+inline ObjPtr<mirror::Class> ResolveVerifyAndClinit(dex::TypeIndex type_idx,
+                                                    ArtMethod* referrer,
+                                                    Thread* self,
+                                                    bool can_run_clinit,
+                                                    bool verify_access) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* klass = class_linker->ResolveType(type_idx, referrer);
+  ObjPtr<mirror::Class> klass = class_linker->ResolveType(type_idx, referrer);
   if (UNLIKELY(klass == nullptr)) {
     CHECK(self->IsExceptionPending());
     return nullptr;  // Failure - Indicate to caller to deliver exception
@@ -748,32 +748,31 @@
   return h_class.Get();
 }
 
-static inline mirror::String* ResolveString(ClassLinker* class_linker,
-                                            dex::StringIndex string_idx,
-                                            ArtMethod* referrer)
+static inline ObjPtr<mirror::String> ResolveString(ClassLinker* class_linker,
+                                                   dex::StringIndex string_idx,
+                                                   ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::String> string = referrer->GetDexCache()->GetResolvedString(string_idx);
   if (UNLIKELY(string == nullptr)) {
     StackHandleScope<1> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
-    const DexFile& dex_file = *dex_cache->GetDexFile();
-    string = class_linker->ResolveString(dex_file, string_idx, dex_cache);
+    string = class_linker->ResolveString(string_idx, dex_cache);
   }
-  return string.Ptr();
+  return string;
 }
 
-inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx) {
+inline ObjPtr<mirror::String> ResolveStringFromCode(ArtMethod* referrer,
+                                                    dex::StringIndex string_idx) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::String> string = referrer->GetDexCache()->GetResolvedString(string_idx);
   if (UNLIKELY(string == nullptr)) {
     StackHandleScope<1> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
-    const DexFile& dex_file = *dex_cache->GetDexFile();
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    string = class_linker->ResolveString(dex_file, string_idx, dex_cache);
+    string = class_linker->ResolveString(string_idx, dex_cache);
   }
-  return string.Ptr();
+  return string;
 }
 
 inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) {
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 2bf4372..f3450da 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -245,7 +245,7 @@
 CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self, CalleeSaveType type) {
   CallerAndOuterMethod result;
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
-  ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
+  ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrameKnownNotTagged();
   auto outer_caller_and_pc = DoGetCalleeSaveMethodOuterCallerAndPc(sp, type);
   result.outer_method = outer_caller_and_pc.first;
   uintptr_t caller_pc = outer_caller_and_pc.second;
@@ -256,7 +256,7 @@
 
 ArtMethod* GetCalleeSaveOuterMethod(Thread* self, CalleeSaveType type) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
-  ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
+  ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrameKnownNotTagged();
   return DoGetCalleeSaveMethodOuterCallerAndPc(sp, type).first;
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index cda70ea..830ef84 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -143,15 +143,16 @@
                                  ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-inline mirror::Class* ResolveVerifyAndClinit(dex::TypeIndex type_idx,
-                                             ArtMethod* referrer,
-                                             Thread* self,
-                                             bool can_run_clinit,
-                                             bool verify_access)
+inline ObjPtr<mirror::Class> ResolveVerifyAndClinit(dex::TypeIndex type_idx,
+                                                    ArtMethod* referrer,
+                                                    Thread* self,
+                                                    bool can_run_clinit,
+                                                    bool verify_access)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
-inline mirror::String* ResolveStringFromCode(ArtMethod* referrer, dex::StringIndex string_idx)
+inline ObjPtr<mirror::String> ResolveStringFromCode(ArtMethod* referrer,
+                                                    dex::StringIndex string_idx)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index 7ec360a..780e221 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
+#include <android-base/logging.h>
+
 #include "art_method-inl.h"
-#include "base/logging.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "java_vm_ext.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 8acaa90..8c90800 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_DEFAULT_INIT_ENTRYPOINTS_H_
 
-#include "base/logging.h"
+#include "base/logging.h"  // FOR VLOG_IS_ON.
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "quick_alloc_entrypoints.h"
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 5f40711..c782c9c 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "base/mutex.h"
 #include "base/systrace.h"
 #include "callee_save_frame.h"
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index f756312..9837838 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -41,6 +41,12 @@
   static_assert(sizeof(GcRoot<mirror::String>) == sizeof(GcRoot<mirror::Object>), "Size check.");
   DCHECK_NE(bss_offset, IndexBssMappingLookup::npos);
   DCHECK_ALIGNED(bss_offset, sizeof(GcRoot<mirror::Object>));
+  if (UNLIKELY(!oat_file->IsExecutable())) {
+    // There are situations where we execute bytecode tied to an oat file opened
+    // as non-executable (i.e. the AOT-compiled code cannot be executed) and we
+    // can JIT that bytecode and get here without the .bss being mmapped.
+    return;
+  }
   GcRoot<mirror::Object>* slot = reinterpret_cast<GcRoot<mirror::Object>*>(
       const_cast<uint8_t*>(oat_file->BssBegin() + bss_offset));
   DCHECK_GE(slot, oat_file->GetBssGcRoots().data());
@@ -132,15 +138,15 @@
   auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(
       self, CalleeSaveType::kSaveEverythingForClinit);
   ArtMethod* caller = caller_and_outer.caller;
-  mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
-                                                 caller,
-                                                 self,
-                                                 /* can_run_clinit */ true,
-                                                 /* verify_access */ false);
+  ObjPtr<mirror::Class> result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
+                                                        caller,
+                                                        self,
+                                                        /* can_run_clinit */ true,
+                                                        /* verify_access */ false);
   if (LIKELY(result != nullptr) && CanReferenceBss(caller_and_outer.outer_method, caller)) {
     StoreTypeInBss(caller_and_outer.outer_method, dex::TypeIndex(type_idx), result);
   }
-  return result;
+  return result.Ptr();
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx, Thread* self)
@@ -150,15 +156,15 @@
   auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(
       self, CalleeSaveType::kSaveEverythingForClinit);
   ArtMethod* caller = caller_and_outer.caller;
-  mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
-                                                 caller,
-                                                 self,
-                                                 /* can_run_clinit */ false,
-                                                 /* verify_access */ false);
+  ObjPtr<mirror::Class> result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
+                                                        caller,
+                                                        self,
+                                                        /* can_run_clinit */ false,
+                                                        /* verify_access */ false);
   if (LIKELY(result != nullptr) && CanReferenceBss(caller_and_outer.outer_method, caller)) {
     StoreTypeInBss(caller_and_outer.outer_method, dex::TypeIndex(type_idx), result);
   }
-  return result;
+  return result.Ptr();
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx, Thread* self)
@@ -168,13 +174,13 @@
   auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
                                                                   CalleeSaveType::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
-  mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
-                                                 caller,
-                                                 self,
-                                                 /* can_run_clinit */ false,
-                                                 /* verify_access */ true);
+  ObjPtr<mirror::Class> result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx),
+                                                        caller,
+                                                        self,
+                                                        /* can_run_clinit */ false,
+                                                        /* verify_access */ true);
   // Do not StoreTypeInBss(); access check entrypoint is never used together with .bss.
-  return result;
+  return result.Ptr();
 }
 
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
@@ -183,11 +189,11 @@
   auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
                                                                   CalleeSaveType::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
-  mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
+  ObjPtr<mirror::String> result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
   if (LIKELY(result != nullptr) && CanReferenceBss(caller_and_outer.outer_method, caller)) {
     StoreStringInBss(caller_and_outer.outer_method, dex::StringIndex(string_idx), result);
   }
-  return result;
+  return result.Ptr();
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 29a62c8..b13b6fb 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
+#include <android-base/logging.h>
+
 #include "art_method-inl.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "indirect_reference_table.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 2496aa0..ca5b799 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -31,6 +31,7 @@
 #include "index_bss_mapping.h"
 #include "instrumentation.h"
 #include "interpreter/interpreter.h"
+#include "jit/jit.h"
 #include "linear_alloc.h"
 #include "method_handles.h"
 #include "method_reference.h"
@@ -1249,17 +1250,8 @@
       } else {
         DCHECK_EQ(invoke_type, kSuper);
         CHECK(caller != nullptr) << invoke_type;
-        StackHandleScope<2> hs(self);
-        Handle<mirror::DexCache> dex_cache(
-            hs.NewHandle(caller->GetDeclaringClass()->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(
-            hs.NewHandle(caller->GetDeclaringClass()->GetClassLoader()));
-        // TODO Maybe put this into a mirror::Class function.
         ObjPtr<mirror::Class> ref_class = linker->LookupResolvedType(
-            *dex_cache->GetDexFile(),
-            dex_cache->GetDexFile()->GetMethodId(called_method.index).class_idx_,
-            dex_cache.Get(),
-            class_loader.Get());
+            caller->GetDexFile()->GetMethodId(called_method.index).class_idx_, caller);
         if (ref_class->IsInterface()) {
           called = ref_class->FindVirtualMethodForInterfaceSuper(called, kRuntimePointerSize);
         } else {
@@ -2167,6 +2159,11 @@
   // Note: We cannot walk the stack properly until fixed up below.
   ArtMethod* called = *sp;
   DCHECK(called->IsNative()) << called->PrettyMethod(true);
+  Runtime* runtime = Runtime::Current();
+  jit::Jit* jit = runtime->GetJit();
+  if (jit != nullptr) {
+    jit->AddSamples(self, called, 1u, /*with_backedges*/ false);
+  }
   uint32_t shorty_len = 0;
   const char* shorty = called->GetShorty(&shorty_len);
   bool critical_native = called->IsCriticalNative();
@@ -2188,7 +2185,7 @@
   }
 
   // Fix up managed-stack things in Thread. After this we can walk the stack.
-  self->SetTopOfStack(sp);
+  self->SetTopOfStackTagged(sp);
 
   self->VerifyStack();
 
@@ -2308,6 +2305,7 @@
   // anything that requires a mutator lock before that would cause problems as GC may have the
   // exclusive mutator lock and may be moving objects, etc.
   ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
+  DCHECK(self->GetManagedStack()->GetTopQuickFrameTag());
   uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   ArtMethod* called = *sp;
   uint32_t cookie = *(sp32 - 1);
@@ -2573,9 +2571,8 @@
   const Instruction& inst = code->InstructionAt(dex_pc);
   DCHECK(inst.Opcode() == Instruction::INVOKE_POLYMORPHIC ||
          inst.Opcode() == Instruction::INVOKE_POLYMORPHIC_RANGE);
-  const DexFile* dex_file = caller_method->GetDexFile();
   const uint32_t proto_idx = inst.VRegH();
-  const char* shorty = dex_file->GetShorty(proto_idx);
+  const char* shorty = caller_method->GetDexFile()->GetShorty(proto_idx);
   const size_t shorty_length = strlen(shorty);
   static const bool kMethodIsStatic = false;  // invoke() and invokeExact() are not static.
   RememberForGcArgumentVisitor gc_visitor(sp, kMethodIsStatic, shorty, shorty_length, &soa);
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 6a4e5b5..f66836f 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -21,6 +21,7 @@
 #include <sys/ucontext.h>
 
 #include "art_method-inl.h"
+#include "base/logging.h"  // For VLOG
 #include "base/safe_copy.h"
 #include "base/stl_util.h"
 #include "dex_file_types.h"
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 3d0e817..e5b5694 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -23,8 +23,9 @@
 #include <memory>
 #include <string>
 
+#include <android-base/logging.h>
+
 #include "atomic.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "mem_map.h"
 #include "stack_reference.h"
diff --git a/runtime/gc/accounting/bitmap-inl.h b/runtime/gc/accounting/bitmap-inl.h
index cd3923a..ca6b479 100644
--- a/runtime/gc/accounting/bitmap-inl.h
+++ b/runtime/gc/accounting/bitmap-inl.h
@@ -21,9 +21,10 @@
 
 #include <memory>
 
+#include <android-base/logging.h>
+
 #include "atomic.h"
 #include "base/bit_utils.h"
-#include "base/logging.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index 6ff5359..5f2f2dd 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -17,10 +17,12 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
 #define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
 
+#include "card_table.h"
+
+#include <android-base/logging.h>
+
 #include "atomic.h"
 #include "base/bit_utils.h"
-#include "base/logging.h"
-#include "card_table.h"
 #include "mem_map.h"
 #include "space_bitmap.h"
 
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 01b5896..934e57a 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -18,7 +18,6 @@
 
 #include <sys/mman.h>
 
-#include "base/logging.h"
 #include "base/systrace.h"
 #include "card_table-inl.h"
 #include "gc/heap.h"
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 4237e7e..c997f8d 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -17,8 +17,11 @@
 #ifndef ART_RUNTIME_GC_ACCOUNTING_HEAP_BITMAP_H_
 #define ART_RUNTIME_GC_ACCOUNTING_HEAP_BITMAP_H_
 
+#include <android-base/logging.h>
+
 #include "base/allocator.h"
-#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "space_bitmap.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 1b3d0da..0dd05cd 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -18,6 +18,7 @@
 
 #include <memory>
 
+#include "base/logging.h"  // For VLOG
 #include "base/stl_util.h"
 #include "bitmap-inl.h"
 #include "card_table-inl.h"
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index b37dd96..ba83369 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -21,9 +21,10 @@
 
 #include <memory>
 
+#include <android-base/logging.h>
+
 #include "atomic.h"
 #include "base/bit_utils.h"
-#include "base/logging.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 2257b81..2ee4239 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -18,6 +18,7 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/logging.h"  // For VLOG
 #include "base/stl_util.h"
 #include "obj_ptr-inl.h"
 #include "object_callbacks.h"
diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/dlmalloc.cc
index ef916f8..6506220 100644
--- a/runtime/gc/allocator/dlmalloc.cc
+++ b/runtime/gc/allocator/dlmalloc.cc
@@ -16,8 +16,9 @@
 
 #include "dlmalloc.h"
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 
 // ART specific morecore implementation defined in space.cc.
 static void* art_heap_morecore(void* m, intptr_t increment);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index b742ac4..928abe8 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -23,6 +23,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "base/logging.h"  // For VLOG
 #include "base/memory_tool.h"
 #include "base/mutex-inl.h"
 #include "gc/space/memory_tool_settings.h"
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 2c90773..6e5cf0e 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -26,9 +26,10 @@
 #include <unordered_set>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "base/allocator.h"
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "base/mutex.h"
 #include "globals.h"
 #include "thread.h"
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index c5a341f..fa34270 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -22,7 +22,7 @@
 
 #include "base/dumpable.h"
 #include "base/histogram-inl.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "base/mutex-inl.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
index 1024050..3b59618 100644
--- a/runtime/gc/collector/immune_spaces.cc
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -19,6 +19,7 @@
 #include <tuple>
 #include <vector>
 
+#include "base/logging.h"  // For VLOG.
 #include "gc/space/space-inl.h"
 #include "mirror/object.h"
 #include "oat_file.h"
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index aef98de..34cc129 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -16,7 +16,9 @@
 
 #include "mark_compact.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index c6caf4b..fdfe949 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -25,7 +25,7 @@
 #include "base/bounded_fifo.h"
 #include "base/enums.h"
 #include "base/file_utils.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "base/mutex-inl.h"
 #include "base/systrace.h"
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 9fb37b6..3150781 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -22,7 +22,7 @@
 #include <sstream>
 #include <vector>
 
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 8712080..d88fcdc 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -15,7 +15,10 @@
  */
 
 #include "gc_cause.h"
-#include "base/logging.h"
+
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "globals.h"
 
 #include <ostream>
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 2047646..52dd104 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -401,8 +401,7 @@
         return true;
       }
       // TODO: Grow for allocation is racy, fix it.
-      VLOG(heap) << "Growing heap from " << PrettySize(max_allowed_footprint_) << " to "
-          << PrettySize(new_footprint) << " for a " << PrettySize(alloc_size) << " allocation";
+      VlogHeapGrowth(max_allowed_footprint_, new_footprint, alloc_size);
       max_allowed_footprint_ = new_footprint;
     }
   }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 9f62666..f7be4c8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -30,6 +30,7 @@
 #include "base/dumpable.h"
 #include "base/file_utils.h"
 #include "base/histogram-inl.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/memory_tool.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -799,12 +800,11 @@
   bool has_waited = false;
   uint64_t wait_start = NanoTime();
   if (thread_flip_running_) {
-    ATRACE_BEGIN("IncrementDisableThreadFlip");
+    ScopedTrace trace("IncrementDisableThreadFlip");
     while (thread_flip_running_) {
       has_waited = true;
       thread_flip_cond_->Wait(self);
     }
-    ATRACE_END();
   }
   ++disable_thread_flip_count_;
   if (has_waited) {
@@ -1796,19 +1796,25 @@
   return GetBytesFreedEver() + GetBytesAllocated();
 }
 
+// Check whether the given object is an instance of the given class.
+static bool MatchesClass(mirror::Object* obj,
+                         Handle<mirror::Class> h_class,
+                         bool use_is_assignable_from) REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::Class* instance_class = obj->GetClass();
+  CHECK(instance_class != nullptr);
+  ObjPtr<mirror::Class> klass = h_class.Get();
+  if (use_is_assignable_from) {
+    return klass != nullptr && klass->IsAssignableFrom(instance_class);
+  }
+  return instance_class == klass;
+}
+
 void Heap::CountInstances(const std::vector<Handle<mirror::Class>>& classes,
                           bool use_is_assignable_from,
                           uint64_t* counts) {
   auto instance_counter = [&](mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Class* instance_class = obj->GetClass();
-    CHECK(instance_class != nullptr);
     for (size_t i = 0; i < classes.size(); ++i) {
-      ObjPtr<mirror::Class> klass = classes[i].Get();
-      if (use_is_assignable_from) {
-        if (klass != nullptr && klass->IsAssignableFrom(instance_class)) {
-          ++counts[i];
-        }
-      } else if (instance_class == klass) {
+      if (MatchesClass(obj, classes[i], use_is_assignable_from)) {
         ++counts[i];
       }
     }
@@ -1818,11 +1824,12 @@
 
 void Heap::GetInstances(VariableSizedHandleScope& scope,
                         Handle<mirror::Class> h_class,
+                        bool use_is_assignable_from,
                         int32_t max_count,
                         std::vector<Handle<mirror::Object>>& instances) {
   DCHECK_GE(max_count, 0);
   auto instance_collector = [&](mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (obj->GetClass() == h_class.Get()) {
+    if (MatchesClass(obj, h_class, use_is_assignable_from)) {
       if (max_count == 0 || instances.size() < static_cast<size_t>(max_count)) {
         instances.push_back(scope.NewHandle(obj));
       }
@@ -4149,5 +4156,10 @@
   return verification_.get();
 }
 
+void Heap::VlogHeapGrowth(size_t max_allowed_footprint, size_t new_footprint, size_t alloc_size) {
+  VLOG(heap) << "Growing heap from " << PrettySize(max_allowed_footprint) << " to "
+             << PrettySize(new_footprint) << " for a " << PrettySize(alloc_size) << " allocation";
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 4d7424c..0d11658 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -22,11 +22,14 @@
 #include <unordered_set>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "allocator_type.h"
 #include "arch/instruction_set.h"
 #include "atomic.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "base/mutex.h"
+#include "base/runtime_debug.h"
 #include "base/time_utils.h"
 #include "gc/collector/gc_type.h"
 #include "gc/collector/iteration.h"
@@ -346,9 +349,10 @@
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Implements JDWP RT_Instances.
+  // Implements VMDebug.getInstancesOfClasses and JDWP RT_Instances.
   void GetInstances(VariableSizedHandleScope& scope,
                     Handle<mirror::Class> c,
+                    bool use_is_assignable_from,
                     int32_t max_count,
                     std::vector<Handle<mirror::Object>>& instances)
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
@@ -1095,6 +1099,9 @@
 
   void TraceHeapSize(size_t heap_size);
 
+  // Remove a vlog code from heap-inl.h which is transitively included in half the world.
+  static void VlogHeapGrowth(size_t max_allowed_footprint, size_t new_footprint, size_t alloc_size);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_ GUARDED_BY(Locks::mutator_lock_);
 
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 576a35c..a3eef90 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -16,6 +16,7 @@
 
 #include "dlmalloc_space-inl.h"
 
+#include "base/logging.h"  // For VLOG.
 #include "base/time_utils.h"
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 74813b4..bcfc68c 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1587,7 +1587,9 @@
     if (!Runtime::Current()->IsImageDex2OatEnabled()) {
       local_error_msg = "Patching disabled.";
     } else if (secondary_image) {
-      local_error_msg = "Cannot patch a secondary image.";
+      // We really want a working image. Prune and restart.
+      PruneDalvikCache(image_isa);
+      _exit(1);
     } else if (ImageCreationAllowed(is_global_cache, image_isa, &local_error_msg)) {
       bool patch_success =
           RelocateImage(image_location, cache_filename.c_str(), image_isa, &local_error_msg);
diff --git a/runtime/gc/space/image_space_fs.h b/runtime/gc/space/image_space_fs.h
index a0ecb95..6ce81e9 100644
--- a/runtime/gc/space/image_space_fs.h
+++ b/runtime/gc/space/image_space_fs.h
@@ -23,7 +23,7 @@
 #include "android-base/stringprintf.h"
 
 #include "base/file_utils.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "base/unix_file/fd_file.h"
 #include "globals.h"
diff --git a/runtime/gc/space/image_space_test.cc b/runtime/gc/space/image_space_test.cc
index 429abf3..fcc47d4 100644
--- a/runtime/gc/space/image_space_test.cc
+++ b/runtime/gc/space/image_space_test.cc
@@ -139,11 +139,10 @@
   EXPECT_FALSE(Runtime::Current()->GetHeap()->GetBootImageSpaces().empty());
 }
 
-// Disabled for b/63622587.
-// using ImageSpaceNoDex2oatNoPatchoatTest = ImageSpaceLoadingTest<true, true, false, false>;
-// TEST_F(ImageSpaceNoDex2oatNoPatchoatTest, Test) {
-//   EXPECT_TRUE(Runtime::Current()->GetHeap()->GetBootImageSpaces().empty());
-// }
+using ImageSpaceNoDex2oatNoPatchoatTest = ImageSpaceLoadingTest<true, true, false, false>;
+TEST_F(ImageSpaceNoDex2oatNoPatchoatTest, Test) {
+  EXPECT_TRUE(Runtime::Current()->GetHeap()->GetBootImageSpaces().empty());
+}
 
 using ImageSpaceNoRelocateNoDex2oatNoPatchoatTest = ImageSpaceLoadingTest<true, false, false, false>;
 TEST_F(ImageSpaceNoRelocateNoDex2oatNoPatchoatTest, Test) {
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 45f4f82..d2efb10 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -20,7 +20,9 @@
 
 #include <memory>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "base/memory_tool.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index dcb7837..17274b5 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -18,6 +18,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "base/logging.h"  // For VLOG
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 5d1f191..3a685cb 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -17,6 +17,7 @@
 
 #include "rosalloc_space-inl.h"
 
+#include "base/logging.h"  // For VLOG.
 #include "base/time_utils.h"
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 74ce273..2c6afa7 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -16,7 +16,9 @@
 
 #include "space.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
diff --git a/runtime/gc/verification.cc b/runtime/gc/verification.cc
index 3cd04a6..d99b377 100644
--- a/runtime/gc/verification.cc
+++ b/runtime/gc/verification.cc
@@ -86,7 +86,7 @@
                                      mirror::Object* ref,
                                      bool fatal) const {
   // Lowest priority logging first:
-  PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
+  PrintFileToLog("/proc/self/maps", android::base::LogSeverity::FATAL_WITHOUT_ABORT);
   MemMap::DumpMaps(LOG_STREAM(FATAL_WITHOUT_ABORT), true);
   // Buffer the output in the string stream since it is more important than the stack traces
   // and we want it to have log priority. The stack traces are printed from Runtime::Abort
diff --git a/runtime/handle.h b/runtime/handle.h
index ccff575..18e503d 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_HANDLE_H_
 #define ART_RUNTIME_HANDLE_H_
 
+#include <android-base/logging.h>
+
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/value_object.h"
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index f248a11..28a2302 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -19,8 +19,9 @@
 
 #include <stack>
 
+#include <android-base/logging.h>
+
 #include "base/enums.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "handle.h"
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 6a1a8c7..f4fc85b 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -36,11 +36,13 @@
 
 #include <set>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
diff --git a/runtime/indenter.h b/runtime/indenter.h
index 69b9732..6361dd2 100644
--- a/runtime/indenter.h
+++ b/runtime/indenter.h
@@ -20,7 +20,8 @@
 #include <ostream>
 #include <streambuf>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 
 namespace art {
diff --git a/runtime/index_bss_mapping.h b/runtime/index_bss_mapping.h
index d9f4e66..dcbc05c 100644
--- a/runtime/index_bss_mapping.h
+++ b/runtime/index_bss_mapping.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_INDEX_BSS_MAPPING_H_
 #define ART_RUNTIME_INDEX_BSS_MAPPING_H_
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 6675099..00184e2 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -23,8 +23,10 @@
 #include <limits>
 #include <string>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index d2d017e..122d1a8 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -1062,7 +1062,7 @@
   // The second parameter is the name to lookup.
   {
     dex::StringIndex name_idx(static_cast<uint32_t>(it.GetJavaValue().i));
-    ObjPtr<mirror::String> name = class_linker->ResolveString(*dex_file, name_idx, dex_cache);
+    ObjPtr<mirror::String> name = class_linker->ResolveString(name_idx, dex_cache);
     if (name.IsNull()) {
       DCHECK(self->IsExceptionPending());
       return nullptr;
@@ -1073,12 +1073,8 @@
 
   // The third parameter is the method type associated with the name.
   uint32_t method_type_idx = static_cast<uint32_t>(it.GetJavaValue().i);
-  Handle<mirror::MethodType>
-      method_type(hs.NewHandle(class_linker->ResolveMethodType(self,
-                                                               *dex_file,
-                                                               method_type_idx,
-                                                               dex_cache,
-                                                               class_loader)));
+  Handle<mirror::MethodType> method_type(hs.NewHandle(
+      class_linker->ResolveMethodType(self, method_type_idx, dex_cache, class_loader)));
   if (method_type.IsNull()) {
     DCHECK(self->IsExceptionPending());
     return nullptr;
@@ -1113,7 +1109,7 @@
       case EncodedArrayValueIterator::ValueType::kMethodType: {
         uint32_t idx = static_cast<uint32_t>(jvalue.i);
         ObjPtr<mirror::MethodType> ref =
-            class_linker->ResolveMethodType(self, *dex_file, idx, dex_cache, class_loader);
+            class_linker->ResolveMethodType(self, idx, dex_cache, class_loader);
         if (ref.IsNull()) {
           DCHECK(self->IsExceptionPending());
           return nullptr;
@@ -1136,7 +1132,7 @@
       }
       case EncodedArrayValueIterator::ValueType::kString: {
         dex::StringIndex idx(static_cast<uint32_t>(jvalue.i));
-        ObjPtr<mirror::String> ref = class_linker->ResolveString(*dex_file, idx, dex_cache);
+        ObjPtr<mirror::String> ref = class_linker->ResolveString(idx, dex_cache);
         if (ref.IsNull()) {
           DCHECK(self->IsExceptionPending());
           return nullptr;
@@ -1147,8 +1143,7 @@
       }
       case EncodedArrayValueIterator::ValueType::kType: {
         dex::TypeIndex idx(static_cast<uint32_t>(jvalue.i));
-        ObjPtr<mirror::Class> ref =
-            class_linker->ResolveType(*dex_file, idx, dex_cache, class_loader);
+        ObjPtr<mirror::Class> ref = class_linker->ResolveType(idx, dex_cache, class_loader);
         if (ref.IsNull()) {
           DCHECK(self->IsExceptionPending());
           return nullptr;
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index f097bc7..269b013 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -26,13 +26,14 @@
 #include <iostream>
 #include <sstream>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
-#include "base/logging.h"
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "class_linker-inl.h"
 #include "common_dex_operations.h"
 #include "common_throws.h"
@@ -206,17 +207,17 @@
   }
 }
 
-static inline mirror::MethodHandle* ResolveMethodHandle(Thread* self,
-                                                        uint32_t method_handle_index,
-                                                        ArtMethod* referrer)
+static inline ObjPtr<mirror::MethodHandle> ResolveMethodHandle(Thread* self,
+                                                               uint32_t method_handle_index,
+                                                               ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   return class_linker->ResolveMethodHandle(self, method_handle_index, referrer);
 }
 
-static inline mirror::MethodType* ResolveMethodType(Thread* self,
-                                                    uint32_t method_type_index,
-                                                    ArtMethod* referrer)
+static inline ObjPtr<mirror::MethodType> ResolveMethodType(Thread* self,
+                                                           uint32_t method_type_index,
+                                                           ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   return class_linker->ResolveMethodType(self, method_type_index, referrer);
@@ -348,9 +349,7 @@
   if (UNLIKELY(string_ptr == nullptr)) {
     StackHandleScope<1> hs(self);
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
-    string_ptr = Runtime::Current()->GetClassLinker()->ResolveString(*dex_cache->GetDexFile(),
-                                                                     string_idx,
-                                                                     dex_cache);
+    string_ptr = Runtime::Current()->GetClassLinker()->ResolveString(string_idx, dex_cache);
   }
   return string_ptr;
 }
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
index de617a9..df4bcc6 100644
--- a/runtime/interpreter/mterp/arm/entry.S
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -23,7 +23,7 @@
 /*
  * On entry:
  *  r0  Thread* self/
- *  r1  code_item
+ *  r1  insns_
  *  r2  ShadowFrame
  *  r3  JValue* result_register
  *
@@ -56,6 +56,7 @@
     VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
     ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
     add     rPC, r1, r0, lsl #1                    @ Create direct pointer to 1st dex opcode
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/arm/header.S b/runtime/interpreter/mterp/arm/header.S
index 51c2ba4..64ab9ef 100644
--- a/runtime/interpreter/mterp/arm/header.S
+++ b/runtime/interpreter/mterp/arm/header.S
@@ -85,6 +85,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index f3d40ff..8d61210 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -20,7 +20,7 @@
  * Interpreter entry point.
  * On entry:
  *  x0  Thread* self/
- *  x1  code_item
+ *  x1  insns_
  *  x2  ShadowFrame
  *  x3  JValue* result_register
  *
@@ -46,6 +46,7 @@
     add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
     add     xPC, x1, w0, lsl #1                    // Create direct pointer to 1st dex opcode
+    .cfi_register DPC_PSEUDO_REG, xPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 47f12d2..9261b77 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -87,6 +87,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
diff --git a/runtime/interpreter/mterp/cfi_asm_support.h b/runtime/interpreter/mterp/cfi_asm_support.h
new file mode 100644
index 0000000..a97e153
--- /dev/null
+++ b/runtime/interpreter/mterp/cfi_asm_support.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_MTERP_CFI_ASM_SUPPORT_H_
+#define ART_RUNTIME_INTERPRETER_MTERP_CFI_ASM_SUPPORT_H_
+
+/*
+ * To keep track of the Dalvik PC, give assign it a magic register number that
+ * won't be confused with a pysical register.  Then, standard .cfi directives
+ * will track the location of it so that it may be extracted during a stack
+ * unwind.
+ *
+ * The Dalvik PC will be in either a physical registor, or the frame.
+ * Encoded from the ASCII string " DEX" -> 0x20 0x44 0x45 0x58
+ */
+#define DPC_PSEUDO_REG 0x20444558
+
+#endif  // ART_RUNTIME_INTERPRETER_MTERP_CFI_ASM_SUPPORT_H_
diff --git a/runtime/interpreter/mterp/mips/entry.S b/runtime/interpreter/mterp/mips/entry.S
index 03de985..41b5d56 100644
--- a/runtime/interpreter/mterp/mips/entry.S
+++ b/runtime/interpreter/mterp/mips/entry.S
@@ -32,6 +32,7 @@
  */
 
 ExecuteMterpImpl:
+    .cfi_startproc
     .set noreorder
     .cpload t9
     .set reorder
@@ -53,6 +54,7 @@
     EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
     lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
     EAS1(rPC, a1, a0)                             # Create direct pointer to 1st dex opcode
+    .cfi_register DPC_PSEUDO_REG, rPC
 
     EXPORT_PC()
 
diff --git a/runtime/interpreter/mterp/mips/footer.S b/runtime/interpreter/mterp/mips/footer.S
index 6e1ba1c..1c784ef 100644
--- a/runtime/interpreter/mterp/mips/footer.S
+++ b/runtime/interpreter/mterp/mips/footer.S
@@ -284,4 +284,5 @@
     STACK_LOAD_FULL()
     jalr    zero, ra
 
+    .cfi_endproc
     .end ExecuteMterpImpl
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
index e4552dd..0f7a6f1 100644
--- a/runtime/interpreter/mterp/mips/header.S
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -32,6 +32,7 @@
  */
 
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 #if (__mips==32) && (__mips_isa_rev>=2)
 #define MIPS32REVGE2    /* mips32r2 and greater */
diff --git a/runtime/interpreter/mterp/mips64/entry.S b/runtime/interpreter/mterp/mips64/entry.S
index 436b88d..841a817 100644
--- a/runtime/interpreter/mterp/mips64/entry.S
+++ b/runtime/interpreter/mterp/mips64/entry.S
@@ -73,6 +73,7 @@
     dlsa    rREFS, v0, rFP, 2
     lw      v0, SHADOWFRAME_DEX_PC_OFFSET(a2)
     dlsa    rPC, v0, a1, 1
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/mips64/header.S b/runtime/interpreter/mterp/mips64/header.S
index d1acefd..2b550cb 100644
--- a/runtime/interpreter/mterp/mips64/header.S
+++ b/runtime/interpreter/mterp/mips64/header.S
@@ -102,6 +102,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 92dd19e..9c7645a 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -151,8 +151,14 @@
       Dbg::IsDebuggerActive() ||
       // An async exception has been thrown. We need to go to the switch interpreter. MTerp doesn't
       // know how to deal with these so we could end up never dealing with it if we are in an
-      // infinite loop.
-      UNLIKELY(Thread::Current()->IsAsyncExceptionPending());
+      // infinite loop. Since this can be called in a tight loop and getting the current thread
+      // requires a TLS read we instead first check a short-circuit runtime flag that will only be
+      // set if something tries to set an async exception. This will make this function faster in
+      // the common case where no async exception has ever been sent. We don't need to worry about
+      // synchronization on the runtime flag since it is only set in a checkpoint which will either
+      // take place on the current thread or act as a synchronization point.
+      (UNLIKELY(runtime->AreAsyncExceptionsThrown()) &&
+       Thread::Current()->IsAsyncExceptionPending());
 }
 
 
@@ -370,15 +376,15 @@
                                   ShadowFrame* shadow_frame,
                                   Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::Class* c = ResolveVerifyAndClinit(dex::TypeIndex(index),
-                                            shadow_frame->GetMethod(),
-                                            self,
-                                            false,
-                                            false);
+  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(index),
+                                                   shadow_frame->GetMethod(),
+                                                   self,
+                                                   /* can_run_clinit */ false,
+                                                   /* verify_access */ false);
   if (UNLIKELY(c == nullptr)) {
     return true;
   }
-  shadow_frame->SetVRegReference(tgt_vreg, c);
+  shadow_frame->SetVRegReference(tgt_vreg, c.Ptr());
   return false;
 }
 
@@ -457,17 +463,17 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
   mirror::Object* obj = nullptr;
-  mirror::Class* c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegB_21c()),
-                                            shadow_frame->GetMethod(),
-                                            self,
-                                            false,
-                                            false);
+  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(dex::TypeIndex(inst->VRegB_21c()),
+                                                   shadow_frame->GetMethod(),
+                                                   self,
+                                                   /* can_run_clinit */ false,
+                                                   /* verify_access */ false);
   if (LIKELY(c != nullptr)) {
     if (UNLIKELY(c->IsStringClass())) {
       gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
       obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
     } else {
-      obj = AllocObjectFromCode<true>(c,
+      obj = AllocObjectFromCode<true>(c.Ptr(),
                                       self,
                                       Runtime::Current()->GetHeap()->GetCurrentAllocator());
     }
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 69d7edb..f3c1124 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -92,6 +92,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -341,7 +342,7 @@
 /*
  * On entry:
  *  r0  Thread* self/
- *  r1  code_item
+ *  r1  insns_
  *  r2  ShadowFrame
  *  r3  JValue* result_register
  *
@@ -374,6 +375,7 @@
     VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
     ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
     add     rPC, r1, r0, lsl #1                    @ Create direct pointer to 1st dex opcode
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 82edab4..347d54f 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -94,6 +94,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 #define MTERP_PROFILE_BRANCHES 1
 #define MTERP_LOGGING 0
@@ -378,7 +379,7 @@
  * Interpreter entry point.
  * On entry:
  *  x0  Thread* self/
- *  x1  code_item
+ *  x1  insns_
  *  x2  ShadowFrame
  *  x3  JValue* result_register
  *
@@ -404,6 +405,7 @@
     add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
     add     xPC, x1, w0, lsl #1                    // Create direct pointer to 1st dex opcode
+    .cfi_register DPC_PSEUDO_REG, xPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index 8cc1b19..1687afa 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -39,6 +39,7 @@
  */
 
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 #if (__mips==32) && (__mips_isa_rev>=2)
 #define MIPS32REVGE2    /* mips32r2 and greater */
@@ -765,6 +766,7 @@
  */
 
 ExecuteMterpImpl:
+    .cfi_startproc
     .set noreorder
     .cpload t9
     .set reorder
@@ -786,6 +788,7 @@
     EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
     lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
     EAS1(rPC, a1, a0)                             # Create direct pointer to 1st dex opcode
+    .cfi_register DPC_PSEUDO_REG, rPC
 
     EXPORT_PC()
 
@@ -12842,5 +12845,6 @@
     STACK_LOAD_FULL()
     jalr    zero, ra
 
+    .cfi_endproc
     .end ExecuteMterpImpl
 
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 139ee25..559c72b 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -109,6 +109,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
@@ -407,6 +408,7 @@
     dlsa    rREFS, v0, rFP, 2
     lw      v0, SHADOWFRAME_DEX_PC_OFFSET(a2)
     dlsa    rPC, v0, a1, 1
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index cbab61e..0613c9d 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -95,6 +95,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 /*
  * Handle mac compiler specific
@@ -342,7 +343,7 @@
 /*
  * On entry:
  *  0  Thread* self
- *  1  code_item
+ *  1  insns_
  *  2  ShadowFrame
  *  3  JValue* result_register
  *
@@ -379,6 +380,7 @@
     leal    (rFP, %eax, 4), rREFS
     movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
     lea     (%ecx, %eax, 2), rPC
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Set up for backwards branches & osr profiling */
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 83c3e4f..aa91db3 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -91,6 +91,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 /*
  * Handle mac compiler specific
@@ -328,7 +329,7 @@
 /*
  * On entry:
  *  0  Thread* self
- *  1  code_item
+ *  1  insns_
  *  2  ShadowFrame
  *  3  JValue* result_register
  *
@@ -362,6 +363,7 @@
     leaq    (rFP, %rax, 4), rREFS
     movl    SHADOWFRAME_DEX_PC_OFFSET(IN_ARG2), %eax
     leaq    (IN_ARG1, %rax, 2), rPC
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/x86/entry.S b/runtime/interpreter/mterp/x86/entry.S
index 055e834..10ca836 100644
--- a/runtime/interpreter/mterp/x86/entry.S
+++ b/runtime/interpreter/mterp/x86/entry.S
@@ -24,7 +24,7 @@
 /*
  * On entry:
  *  0  Thread* self
- *  1  code_item
+ *  1  insns_
  *  2  ShadowFrame
  *  3  JValue* result_register
  *
@@ -61,6 +61,7 @@
     leal    (rFP, %eax, 4), rREFS
     movl    SHADOWFRAME_DEX_PC_OFFSET(%edx), %eax
     lea     (%ecx, %eax, 2), rPC
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Set up for backwards branches & osr profiling */
diff --git a/runtime/interpreter/mterp/x86/header.S b/runtime/interpreter/mterp/x86/header.S
index 370012f..0e585e8 100644
--- a/runtime/interpreter/mterp/x86/header.S
+++ b/runtime/interpreter/mterp/x86/header.S
@@ -88,6 +88,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 /*
  * Handle mac compiler specific
diff --git a/runtime/interpreter/mterp/x86_64/entry.S b/runtime/interpreter/mterp/x86_64/entry.S
index 83b845b..d85ef7f 100644
--- a/runtime/interpreter/mterp/x86_64/entry.S
+++ b/runtime/interpreter/mterp/x86_64/entry.S
@@ -24,7 +24,7 @@
 /*
  * On entry:
  *  0  Thread* self
- *  1  code_item
+ *  1  insns_
  *  2  ShadowFrame
  *  3  JValue* result_register
  *
@@ -58,6 +58,7 @@
     leaq    (rFP, %rax, 4), rREFS
     movl    SHADOWFRAME_DEX_PC_OFFSET(IN_ARG2), %eax
     leaq    (IN_ARG1, %rax, 2), rPC
+    .cfi_register DPC_PSEUDO_REG, rPC
     EXPORT_PC
 
     /* Starting ibase */
diff --git a/runtime/interpreter/mterp/x86_64/header.S b/runtime/interpreter/mterp/x86_64/header.S
index 9d21f3f..a3ef895 100644
--- a/runtime/interpreter/mterp/x86_64/header.S
+++ b/runtime/interpreter/mterp/x86_64/header.S
@@ -84,6 +84,7 @@
  * to expand the macros into assembler assignment statements.
  */
 #include "asm_support.h"
+#include "interpreter/mterp/cfi_asm_support.h"
 
 /*
  * Handle mac compiler specific
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 31e7986..dece830 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -26,12 +26,12 @@
 #include <locale>
 #include <unordered_map>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "art_method-inl.h"
 #include "base/casts.h"
 #include "base/enums.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "class_linker.h"
 #include "common_throws.h"
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index aeeda1e..d712b10 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_JDWP_JDWP_H_
 
 #include "atomic.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/mutex.h"
 #include "jdwp/jdwp_bits.h"
 #include "jdwp/jdwp_constants.h"
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index ede4f9e..d68430f 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -22,7 +22,7 @@
 
 #include "android-base/stringprintf.h"
 
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "jdwp/jdwp_priv.h"
 #include "thread-current-inl.h"
 
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 41cb642..9409b76 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -25,7 +25,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "debugger.h"
 #include "jdwp/jdwp_constants.h"
 #include "jdwp/jdwp_expand_buf.h"
diff --git a/runtime/jdwp/jdwp_expand_buf.cc b/runtime/jdwp/jdwp_expand_buf.cc
index f0b8c91..4b4ca0e 100644
--- a/runtime/jdwp/jdwp_expand_buf.cc
+++ b/runtime/jdwp/jdwp_expand_buf.cc
@@ -23,7 +23,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "jdwp/jdwp.h"
 #include "jdwp/jdwp_bits.h"
 
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 618332b..89eef88 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -24,7 +24,7 @@
 
 #include "atomic.h"
 #include "base/hex_dump.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "debugger.h"
 #include "jdwp/jdwp_constants.h"
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index e6c6068..e275554 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -23,7 +23,7 @@
 #include "android-base/stringprintf.h"
 
 #include "atomic.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/time_utils.h"
 #include "debugger.h"
 #include "jdwp/jdwp_priv.h"
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index 97662f0..673a942 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -28,7 +28,7 @@
 
 #include "android-base/stringprintf.h"
 
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "jdwp/jdwp_priv.h"
 
 namespace art {
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 135d9b1..4d1c85a 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -16,7 +16,8 @@
 
 #include "debugger_interface.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/mutex.h"
 #include "thread-current-inl.h"
 #include "thread.h"
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 953e195..278bc57 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -20,8 +20,9 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/memory_tool.h"
+#include "base/runtime_debug.h"
 #include "debugger.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
@@ -643,10 +644,14 @@
     return;
   }
 
-  if (method->IsClassInitializer() || method->IsNative() || !method->IsCompilable()) {
+  if (method->IsClassInitializer() || !method->IsCompilable()) {
     // We do not want to compile such methods.
     return;
   }
+  if (hot_method_threshold_ == 0) {
+    // Tests might request JIT on first use (compiled synchronously in the interpreter).
+    return;
+  }
   DCHECK(thread_pool_ != nullptr);
   DCHECK_GT(warm_method_threshold_, 0);
   DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
@@ -659,7 +664,8 @@
     count *= priority_thread_weight_;
   }
   int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
-  if (starting_count < warm_method_threshold_) {
+  // Note: Native method have no "warm" state or profiling info.
+  if (LIKELY(!method->IsNative()) && starting_count < warm_method_threshold_) {
     if ((new_count >= warm_method_threshold_) &&
         (method->GetProfilingInfo(kRuntimePointerSize) == nullptr)) {
       bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
@@ -696,6 +702,7 @@
         // If the samples don't contain any back edge, we don't increment the hotness.
         return;
       }
+      DCHECK(!method->IsNative());  // No back edges reported for native methods.
       if ((new_count >= osr_method_threshold_) &&  !code_cache_->IsOsrCompiled(method)) {
         DCHECK(thread_pool_ != nullptr);
         thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 3220513..6f03a68 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -21,6 +21,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
@@ -55,6 +56,107 @@
 static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
 static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
 
+class JitCodeCache::JniStubKey {
+ public:
+  explicit JniStubKey(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_)
+      : shorty_(method->GetShorty()),
+        is_static_(method->IsStatic()),
+        is_fast_native_(method->IsFastNative()),
+        is_critical_native_(method->IsCriticalNative()),
+        is_synchronized_(method->IsSynchronized()) {
+    DCHECK(!(is_fast_native_ && is_critical_native_));
+  }
+
+  bool operator<(const JniStubKey& rhs) const {
+    if (is_static_ != rhs.is_static_) {
+      return rhs.is_static_;
+    }
+    if (is_synchronized_ != rhs.is_synchronized_) {
+      return rhs.is_synchronized_;
+    }
+    if (is_fast_native_ != rhs.is_fast_native_) {
+      return rhs.is_fast_native_;
+    }
+    if (is_critical_native_ != rhs.is_critical_native_) {
+      return rhs.is_critical_native_;
+    }
+    return strcmp(shorty_, rhs.shorty_) < 0;
+  }
+
+  // Update the shorty to point to another method's shorty. Call this function when removing
+  // the method that references the old shorty from JniCodeData and not removing the entire
+  // JniCodeData; the old shorty may become a dangling pointer when that method is unloaded.
+  void UpdateShorty(ArtMethod* method) const REQUIRES_SHARED(Locks::mutator_lock_) {
+    const char* shorty = method->GetShorty();
+    DCHECK_STREQ(shorty_, shorty);
+    shorty_ = shorty;
+  }
+
+ private:
+  // The shorty points to a DexFile data and may need to change
+  // to point to the same shorty in a different DexFile.
+  mutable const char* shorty_;
+
+  const bool is_static_;
+  const bool is_fast_native_;
+  const bool is_critical_native_;
+  const bool is_synchronized_;
+};
+
+class JitCodeCache::JniStubData {
+ public:
+  JniStubData() : code_(nullptr), methods_() {}
+
+  void SetCode(const void* code) {
+    DCHECK(code != nullptr);
+    code_ = code;
+  }
+
+  const void* GetCode() const {
+    return code_;
+  }
+
+  bool IsCompiled() const {
+    return GetCode() != nullptr;
+  }
+
+  void AddMethod(ArtMethod* method) {
+    if (!ContainsElement(methods_, method)) {
+      methods_.push_back(method);
+    }
+  }
+
+  const std::vector<ArtMethod*>& GetMethods() const {
+    return methods_;
+  }
+
+  void RemoveMethodsIn(const LinearAlloc& alloc) {
+    auto kept_end = std::remove_if(
+        methods_.begin(),
+        methods_.end(),
+        [&alloc](ArtMethod* method) { return alloc.ContainsUnsafe(method); });
+    methods_.erase(kept_end, methods_.end());
+  }
+
+  bool RemoveMethod(ArtMethod* method) {
+    auto it = std::find(methods_.begin(), methods_.end(), method);
+    if (it != methods_.end()) {
+      methods_.erase(it);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  void MoveObsoleteMethod(ArtMethod* old_method, ArtMethod* new_method) {
+    std::replace(methods_.begin(), methods_.end(), old_method, new_method);
+  }
+
+ private:
+  const void* code_;
+  std::vector<ArtMethod*> methods_;
+};
+
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
                                    bool generate_debug_info,
@@ -193,14 +295,36 @@
 
 bool JitCodeCache::ContainsMethod(ArtMethod* method) {
   MutexLock mu(Thread::Current(), lock_);
-  for (auto& it : method_code_map_) {
-    if (it.second == method) {
+  if (UNLIKELY(method->IsNative())) {
+    auto it = jni_stubs_map_.find(JniStubKey(method));
+    if (it != jni_stubs_map_.end() &&
+        it->second.IsCompiled() &&
+        ContainsElement(it->second.GetMethods(), method)) {
       return true;
     }
+  } else {
+    for (const auto& it : method_code_map_) {
+      if (it.second == method) {
+        return true;
+      }
+    }
   }
   return false;
 }
 
+const void* JitCodeCache::GetJniStubCode(ArtMethod* method) {
+  DCHECK(method->IsNative());
+  MutexLock mu(Thread::Current(), lock_);
+  auto it = jni_stubs_map_.find(JniStubKey(method));
+  if (it != jni_stubs_map_.end()) {
+    JniStubData& data = it->second;
+    if (data.IsCompiled() && ContainsElement(data.GetMethods(), method)) {
+      return data.GetCode();
+    }
+  }
+  return nullptr;
+}
+
 class ScopedCodeCacheWrite : ScopedTrace {
  public:
   explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false)
@@ -426,7 +550,9 @@
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  FreeData(GetRootTable(code_ptr));
+  if (OatQuickMethodHeader::FromCodePointer(code_ptr)->IsOptimized()) {
+    FreeData(GetRootTable(code_ptr));
+  }  // else this is a JNI stub without any data.
   FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
@@ -463,6 +589,16 @@
     // lead to a deadlock.
     {
       ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = jni_stubs_map_.begin(); it != jni_stubs_map_.end();) {
+        it->second.RemoveMethodsIn(alloc);
+        if (it->second.GetMethods().empty()) {
+          method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->second.GetCode()));
+          it = jni_stubs_map_.erase(it);
+        } else {
+          it->first.UpdateShorty(it->second.GetMethods().front());
+          ++it;
+        }
+      }
       for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
         if (alloc.ContainsUnsafe(it->second)) {
           method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
@@ -572,7 +708,8 @@
                                           bool has_should_deoptimize_flag,
                                           const ArenaSet<ArtMethod*>&
                                               cha_single_implementation_list) {
-  DCHECK(stack_map != nullptr);
+  DCHECK_NE(stack_map != nullptr, method->IsNative());
+  DCHECK(!method->IsNative() || !osr);
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -596,8 +733,8 @@
       std::copy(code, code + code_size, code_ptr);
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       new (method_header) OatQuickMethodHeader(
-          code_ptr - stack_map,
-          code_ptr - method_info,
+          (stack_map != nullptr) ? code_ptr - stack_map : 0u,
+          (method_info != nullptr) ? code_ptr - method_info : 0u,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
@@ -652,24 +789,40 @@
     // possible that the compiled code is considered invalidated by some class linking,
     // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
-    // Fill the root table before updating the entry point.
-    DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
-    DCHECK_LE(roots_data, stack_map);
-    FillRootTable(roots_data, roots);
-    {
-      // Flush data cache, as compiled code references literals in it.
-      // We also need a TLB shootdown to act as memory barrier across cores.
-      ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
-      FlushDataCache(reinterpret_cast<char*>(roots_data),
-                     reinterpret_cast<char*>(roots_data + data_size));
-    }
-    method_code_map_.Put(code_ptr, method);
-    if (osr) {
-      number_of_osr_compilations_++;
-      osr_code_map_.Put(method, code_ptr);
+    if (UNLIKELY(method->IsNative())) {
+      DCHECK(stack_map == nullptr);
+      DCHECK(roots_data == nullptr);
+      auto it = jni_stubs_map_.find(JniStubKey(method));
+      DCHECK(it != jni_stubs_map_.end())
+          << "Entry inserted in NotifyCompilationOf() should be alive.";
+      JniStubData* data = &it->second;
+      DCHECK(ContainsElement(data->GetMethods(), method))
+          << "Entry inserted in NotifyCompilationOf() should contain this method.";
+      data->SetCode(code_ptr);
+      instrumentation::Instrumentation* instrum = Runtime::Current()->GetInstrumentation();
+      for (ArtMethod* m : data->GetMethods()) {
+        instrum->UpdateMethodsCode(m, method_header->GetEntryPoint());
+      }
     } else {
-      Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
-          method, method_header->GetEntryPoint());
+      // Fill the root table before updating the entry point.
+      DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
+      DCHECK_LE(roots_data, stack_map);
+      FillRootTable(roots_data, roots);
+      {
+        // Flush data cache, as compiled code references literals in it.
+        // We also need a TLB shootdown to act as memory barrier across cores.
+        ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
+        FlushDataCache(reinterpret_cast<char*>(roots_data),
+                       reinterpret_cast<char*>(roots_data + data_size));
+      }
+      method_code_map_.Put(code_ptr, method);
+      if (osr) {
+        number_of_osr_compilations_++;
+        osr_code_map_.Put(method, code_ptr);
+      } else {
+        Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+            method, method_header->GetEntryPoint());
+      }
     }
     if (collection_in_progress_) {
       // We need to update the live bitmap if there is a GC to ensure it sees this new
@@ -703,45 +856,18 @@
 }
 
 bool JitCodeCache::RemoveMethod(ArtMethod* method, bool release_memory) {
+  // This function is used only for testing and only with non-native methods.
+  CHECK(!method->IsNative());
+
   MutexLock mu(Thread::Current(), lock_);
-  if (method->IsNative()) {
-    return false;
-  }
 
-  bool in_cache = false;
-  {
-    ScopedCodeCacheWrite ccw(code_map_.get());
-    for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
-      if (code_iter->second == method) {
-        if (release_memory) {
-          FreeCode(code_iter->first);
-        }
-        code_iter = method_code_map_.erase(code_iter);
-        in_cache = true;
-        continue;
-      }
-      ++code_iter;
-    }
-  }
-
-  bool osr = false;
-  auto code_map = osr_code_map_.find(method);
-  if (code_map != osr_code_map_.end()) {
-    osr_code_map_.erase(code_map);
-    osr = true;
-  }
+  bool osr = osr_code_map_.find(method) != osr_code_map_.end();
+  bool in_cache = RemoveMethodLocked(method, release_memory);
 
   if (!in_cache) {
     return false;
   }
 
-  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
-  if (info != nullptr) {
-    auto profile = std::find(profiling_infos_.begin(), profiling_infos_.end(), info);
-    DCHECK(profile != profiling_infos_.end());
-    profiling_infos_.erase(profile);
-  }
-  method->SetProfilingInfo(nullptr);
   method->ClearCounter();
   Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
       method, GetQuickToInterpreterBridge());
@@ -753,34 +879,58 @@
   return true;
 }
 
+bool JitCodeCache::RemoveMethodLocked(ArtMethod* method, bool release_memory) {
+  if (LIKELY(!method->IsNative())) {
+    ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+    if (info != nullptr) {
+      RemoveElement(profiling_infos_, info);
+    }
+    method->SetProfilingInfo(nullptr);
+  }
+
+  bool in_cache = false;
+  ScopedCodeCacheWrite ccw(code_map_.get());
+  if (UNLIKELY(method->IsNative())) {
+    auto it = jni_stubs_map_.find(JniStubKey(method));
+    if (it != jni_stubs_map_.end() && it->second.RemoveMethod(method)) {
+      in_cache = true;
+      if (it->second.GetMethods().empty()) {
+        if (release_memory) {
+          FreeCode(it->second.GetCode());
+        }
+        jni_stubs_map_.erase(it);
+      } else {
+        it->first.UpdateShorty(it->second.GetMethods().front());
+      }
+    }
+  } else {
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      if (it->second == method) {
+        in_cache = true;
+        if (release_memory) {
+          FreeCode(it->first);
+        }
+        it = method_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+
+    auto osr_it = osr_code_map_.find(method);
+    if (osr_it != osr_code_map_.end()) {
+      osr_code_map_.erase(osr_it);
+    }
+  }
+
+  return in_cache;
+}
+
 // This notifies the code cache that the given method has been redefined and that it should remove
 // any cached information it has on the method. All threads must be suspended before calling this
 // method. The compiled code for the method (if there is any) must not be in any threads call stack.
 void JitCodeCache::NotifyMethodRedefined(ArtMethod* method) {
   MutexLock mu(Thread::Current(), lock_);
-  if (method->IsNative()) {
-    return;
-  }
-  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
-  if (info != nullptr) {
-    auto profile = std::find(profiling_infos_.begin(), profiling_infos_.end(), info);
-    DCHECK(profile != profiling_infos_.end());
-    profiling_infos_.erase(profile);
-  }
-  method->SetProfilingInfo(nullptr);
-  ScopedCodeCacheWrite ccw(code_map_.get());
-  for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
-    if (code_iter->second == method) {
-      FreeCode(code_iter->first);
-      code_iter = method_code_map_.erase(code_iter);
-      continue;
-    }
-    ++code_iter;
-  }
-  auto code_map = osr_code_map_.find(method);
-  if (code_map != osr_code_map_.end()) {
-    osr_code_map_.erase(code_map);
-  }
+  RemoveMethodLocked(method, /* release_memory */ true);
 }
 
 // This invalidates old_method. Once this function returns one can no longer use old_method to
@@ -790,11 +940,15 @@
 // shouldn't be used since it is no longer logically in the jit code cache.
 // TODO We should add DCHECKS that validate that the JIT is paused when this method is entered.
 void JitCodeCache::MoveObsoleteMethod(ArtMethod* old_method, ArtMethod* new_method) {
-  // Native methods have no profiling info and need no special handling from the JIT code cache.
+  MutexLock mu(Thread::Current(), lock_);
   if (old_method->IsNative()) {
+    // Update methods in jni_stubs_map_.
+    for (auto& entry : jni_stubs_map_) {
+      JniStubData& data = entry.second;
+      data.MoveObsoleteMethod(old_method, new_method);
+    }
     return;
   }
-  MutexLock mu(Thread::Current(), lock_);
   // Update ProfilingInfo to the new one and remove it from the old_method.
   if (old_method->GetProfilingInfo(kRuntimePointerSize) != nullptr) {
     DCHECK_EQ(old_method->GetProfilingInfo(kRuntimePointerSize)->GetMethod(), old_method);
@@ -936,7 +1090,7 @@
         // its stack frame, it is not the method owning return_pc_. We just pass null to
         // LookupMethodHeader: the method is only checked against in debug builds.
         OatQuickMethodHeader* method_header =
-            code_cache_->LookupMethodHeader(frame.return_pc_, nullptr);
+            code_cache_->LookupMethodHeader(frame.return_pc_, /* method */ nullptr);
         if (method_header != nullptr) {
           const void* code = method_header->GetCode();
           CHECK(code_cache_->GetLiveBitmap()->Test(FromCodeToAllocation(code)));
@@ -1089,7 +1243,7 @@
           const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
           if (ContainsPc(entry_point)) {
             info->SetSavedEntryPoint(entry_point);
-            // Don't call Instrumentation::UpdateMethods, as it can check the declaring
+            // Don't call Instrumentation::UpdateMethodsCode(), as it can check the declaring
             // class of the method. We may be concurrently running a GC which makes accessing
             // the class unsafe. We know it is OK to bypass the instrumentation as we've just
             // checked that the current entry point is JIT compiled code.
@@ -1098,6 +1252,25 @@
         }
 
         DCHECK(CheckLiveCompiledCodeHasProfilingInfo());
+
+        // Change entry points of native methods back to the GenericJNI entrypoint.
+        for (const auto& entry : jni_stubs_map_) {
+          const JniStubData& data = entry.second;
+          if (!data.IsCompiled()) {
+            continue;
+          }
+          // Make sure a single invocation of the GenericJNI trampoline tries to recompile.
+          uint16_t new_counter = Runtime::Current()->GetJit()->HotMethodThreshold() - 1u;
+          const OatQuickMethodHeader* method_header =
+              OatQuickMethodHeader::FromCodePointer(data.GetCode());
+          for (ArtMethod* method : data.GetMethods()) {
+            if (method->GetEntryPointFromQuickCompiledCode() == method_header->GetEntryPoint()) {
+              // Don't call Instrumentation::UpdateMethodsCode(), same as for normal methods above.
+              method->SetCounter(new_counter);
+              method->SetEntryPointFromQuickCompiledCode(GetQuickGenericJniStub());
+            }
+          }
+        }
       }
       live_bitmap_.reset(nullptr);
       NotifyCollectionDone(self);
@@ -1113,13 +1286,22 @@
     MutexLock mu(self, lock_);
     ScopedCodeCacheWrite scc(code_map_.get());
     // Iterate over all compiled code and remove entries that are not marked.
+    for (auto it = jni_stubs_map_.begin(); it != jni_stubs_map_.end();) {
+      JniStubData* data = &it->second;
+      if (!data->IsCompiled() || GetLiveBitmap()->Test(FromCodeToAllocation(data->GetCode()))) {
+        ++it;
+      } else {
+        method_headers.insert(OatQuickMethodHeader::FromCodePointer(data->GetCode()));
+        it = jni_stubs_map_.erase(it);
+      }
+    }
     for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
       const void* code_ptr = it->first;
       uintptr_t allocation = FromCodeToAllocation(code_ptr);
       if (GetLiveBitmap()->Test(allocation)) {
         ++it;
       } else {
-        method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+        method_headers.insert(OatQuickMethodHeader::FromCodePointer(code_ptr));
         it = method_code_map_.erase(it);
       }
     }
@@ -1158,6 +1340,17 @@
     // an entry point is either:
     // - an osr compiled code, that will be removed if not in a thread call stack.
     // - discarded compiled code, that will be removed if not in a thread call stack.
+    for (const auto& entry : jni_stubs_map_) {
+      const JniStubData& data = entry.second;
+      const void* code_ptr = data.GetCode();
+      const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      for (ArtMethod* method : data.GetMethods()) {
+        if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
+          GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
+          break;
+        }
+      }
+    }
     for (const auto& it : method_code_map_) {
       ArtMethod* method = it.second;
       const void* code_ptr = it.first;
@@ -1237,19 +1430,51 @@
     return nullptr;
   }
 
-  MutexLock mu(Thread::Current(), lock_);
-  if (method_code_map_.empty()) {
-    return nullptr;
+  if (!kIsDebugBuild) {
+    // Called with null `method` only from MarkCodeClosure::Run() in debug build.
+    CHECK(method != nullptr);
   }
-  auto it = method_code_map_.lower_bound(reinterpret_cast<const void*>(pc));
-  --it;
 
-  const void* code_ptr = it->first;
-  OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  if (!method_header->Contains(pc)) {
-    return nullptr;
+  MutexLock mu(Thread::Current(), lock_);
+  OatQuickMethodHeader* method_header = nullptr;
+  ArtMethod* found_method = nullptr;  // Only for DCHECK(), not for JNI stubs.
+  if (method != nullptr && UNLIKELY(method->IsNative())) {
+    auto it = jni_stubs_map_.find(JniStubKey(method));
+    if (it == jni_stubs_map_.end() || !ContainsElement(it->second.GetMethods(), method)) {
+      return nullptr;
+    }
+    const void* code_ptr = it->second.GetCode();
+    method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+    if (!method_header->Contains(pc)) {
+      return nullptr;
+    }
+  } else {
+    auto it = method_code_map_.lower_bound(reinterpret_cast<const void*>(pc));
+    if (it != method_code_map_.begin()) {
+      --it;
+      const void* code_ptr = it->first;
+      if (OatQuickMethodHeader::FromCodePointer(code_ptr)->Contains(pc)) {
+        method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+        found_method = it->second;
+      }
+    }
+    if (method_header == nullptr && method == nullptr) {
+      // Scan all compiled JNI stubs as well. This slow search is used only
+      // for checks in debug build, for release builds the `method` is not null.
+      for (auto&& entry : jni_stubs_map_) {
+        const JniStubData& data = entry.second;
+        if (data.IsCompiled() &&
+            OatQuickMethodHeader::FromCodePointer(data.GetCode())->Contains(pc)) {
+          method_header = OatQuickMethodHeader::FromCodePointer(data.GetCode());
+        }
+      }
+    }
+    if (method_header == nullptr) {
+      return nullptr;
+    }
   }
-  if (kIsDebugBuild && method != nullptr) {
+
+  if (kIsDebugBuild && method != nullptr && !method->IsNative()) {
     // When we are walking the stack to redefine classes and creating obsolete methods it is
     // possible that we might have updated the method_code_map by making this method obsolete in a
     // previous frame. Therefore we should just check that the non-obsolete version of this method
@@ -1258,9 +1483,9 @@
     // occur when we are in the process of allocating and setting up obsolete methods. Otherwise
     // method and it->second should be identical. (See openjdkjvmti/ti_redefine.cc for more
     // information.)
-    DCHECK_EQ(it->second->GetNonObsoleteMethod(), method->GetNonObsoleteMethod())
+    DCHECK_EQ(found_method->GetNonObsoleteMethod(), method->GetNonObsoleteMethod())
         << ArtMethod::PrettyMethod(method->GetNonObsoleteMethod()) << " "
-        << ArtMethod::PrettyMethod(it->second->GetNonObsoleteMethod()) << " "
+        << ArtMethod::PrettyMethod(found_method->GetNonObsoleteMethod()) << " "
         << std::hex << pc;
   }
   return method_header;
@@ -1449,21 +1674,51 @@
     return false;
   }
 
-  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
-  if (info == nullptr) {
-    VLOG(jit) << method->PrettyMethod() << " needs a ProfilingInfo to be compiled";
-    // Because the counter is not atomic, there are some rare cases where we may not hit the
-    // threshold for creating the ProfilingInfo. Reset the counter now to "correct" this.
-    ClearMethodCounter(method, /*was_warm*/ false);
-    return false;
-  }
+  if (UNLIKELY(method->IsNative())) {
+    JniStubKey key(method);
+    auto it = jni_stubs_map_.find(key);
+    bool new_compilation = false;
+    if (it == jni_stubs_map_.end()) {
+      // Create a new entry to mark the stub as being compiled.
+      it = jni_stubs_map_.Put(key, JniStubData{});
+      new_compilation = true;
+    }
+    JniStubData* data = &it->second;
+    data->AddMethod(method);
+    if (data->IsCompiled()) {
+      OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(data->GetCode());
+      const void* entrypoint = method_header->GetEntryPoint();
+      // Update also entrypoints of other methods held by the JniStubData.
+      // We could simply update the entrypoint of `method` but if the last JIT GC has
+      // changed these entrypoints to GenericJNI in preparation for a full GC, we may
+      // as well change them back as this stub shall not be collected anyway and this
+      // can avoid a few expensive GenericJNI calls.
+      instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+      for (ArtMethod* m : data->GetMethods()) {
+        instrumentation->UpdateMethodsCode(m, entrypoint);
+      }
+      if (collection_in_progress_) {
+        GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(data->GetCode()));
+      }
+    }
+    return new_compilation;
+  } else {
+    ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+    if (info == nullptr) {
+      VLOG(jit) << method->PrettyMethod() << " needs a ProfilingInfo to be compiled";
+      // Because the counter is not atomic, there are some rare cases where we may not hit the
+      // threshold for creating the ProfilingInfo. Reset the counter now to "correct" this.
+      ClearMethodCounter(method, /*was_warm*/ false);
+      return false;
+    }
 
-  if (info->IsMethodBeingCompiled(osr)) {
-    return false;
-  }
+    if (info->IsMethodBeingCompiled(osr)) {
+      return false;
+    }
 
-  info->SetIsMethodBeingCompiled(true, osr);
-  return true;
+    info->SetIsMethodBeingCompiled(true, osr);
+    return true;
+  }
 }
 
 ProfilingInfo* JitCodeCache::NotifyCompilerUse(ArtMethod* method, Thread* self) {
@@ -1485,10 +1740,23 @@
   info->DecrementInlineUse();
 }
 
-void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED, bool osr) {
-  ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
-  DCHECK(info->IsMethodBeingCompiled(osr));
-  info->SetIsMethodBeingCompiled(false, osr);
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self, bool osr) {
+  DCHECK_EQ(Thread::Current(), self);
+  MutexLock mu(self, lock_);
+  if (UNLIKELY(method->IsNative())) {
+    auto it = jni_stubs_map_.find(JniStubKey(method));
+    DCHECK(it != jni_stubs_map_.end());
+    JniStubData* data = &it->second;
+    DCHECK(ContainsElement(data->GetMethods(), method));
+    if (UNLIKELY(!data->IsCompiled())) {
+      // Failed to compile; the JNI compiler never fails, but the cache may be full.
+      jni_stubs_map_.erase(it);  // Remove the entry added in NotifyCompilationOf().
+    }  // else CommitCodeInternal() updated entrypoints of all methods in the JniStubData.
+  } else {
+    ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
+    DCHECK(info->IsMethodBeingCompiled(osr));
+    info->SetIsMethodBeingCompiled(false, osr);
+  }
 }
 
 size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
@@ -1498,6 +1766,7 @@
 
 void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method,
                                              const OatQuickMethodHeader* header) {
+  DCHECK(!method->IsNative());
   ProfilingInfo* profiling_info = method->GetProfilingInfo(kRuntimePointerSize);
   if ((profiling_info != nullptr) &&
       (profiling_info->GetSavedEntryPoint() == header->GetEntryPoint())) {
@@ -1553,6 +1822,7 @@
   os << "Current JIT code cache size: " << PrettySize(used_memory_for_code_) << "\n"
      << "Current JIT data cache size: " << PrettySize(used_memory_for_data_) << "\n"
      << "Current JIT capacity: " << PrettySize(current_capacity_) << "\n"
+     << "Current number of JIT JNI stub entries: " << jni_stubs_map_.size() << "\n"
      << "Current number of JIT code cache entries: " << method_code_map_.size() << "\n"
      << "Total number of JIT compilations: " << number_of_compilations_ << "\n"
      << "Total number of JIT compilations for on stack replacement: "
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 46a4085..fc011dd 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -35,9 +35,23 @@
 class LinearAlloc;
 class InlineCache;
 class IsMarkedVisitor;
+class JitJniStubTestHelper;
 class OatQuickMethodHeader;
 struct ProfileMethodInfo;
 class ProfilingInfo;
+class Thread;
+
+namespace gc {
+namespace accounting {
+template<size_t kAlignment> class MemoryRangeBitmap;
+}  // namespace accounting
+}  // namespace gc
+
+namespace mirror {
+class Class;
+class Object;
+template<class T> class ObjectArray;
+}  // namespace mirror
 
 namespace gc {
 namespace accounting {
@@ -137,6 +151,9 @@
   // Return true if the code cache contains this method.
   bool ContainsMethod(ArtMethod* method) REQUIRES(!lock_);
 
+  // Return the code pointer for a JNI-compiled stub if the method is in the cache, null otherwise.
+  const void* GetJniStubCode(ArtMethod* method) REQUIRES(!lock_);
+
   // Allocate a region of data that contain `size` bytes, and potentially space
   // for storing `number_of_roots` roots. Returns null if there is no more room.
   // Return the number of bytes allocated.
@@ -160,11 +177,6 @@
     return live_bitmap_.get();
   }
 
-  // Return whether we should do a full collection given the current state of the cache.
-  bool ShouldDoFullCollection()
-      REQUIRES(lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Perform a collection on the code cache.
   void GarbageCollectCache(Thread* self)
       REQUIRES(!lock_)
@@ -296,6 +308,12 @@
       REQUIRES(!lock_)
       REQUIRES(!Locks::cha_lock_);
 
+  // Removes method from the cache. The caller must ensure that all threads
+  // are suspended and the method should not be in any thread's stack.
+  bool RemoveMethodLocked(ArtMethod* method, bool release_memory)
+      REQUIRES(lock_)
+      REQUIRES(Locks::mutator_lock_);
+
   // Free in the mspace allocations for `code_ptr`.
   void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
@@ -315,6 +333,11 @@
   // Set the footprint limit of the code cache.
   void SetFootprintLimit(size_t new_footprint) REQUIRES(lock_);
 
+  // Return whether we should do a full collection given the current state of the cache.
+  bool ShouldDoFullCollection()
+      REQUIRES(lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void DoCollection(Thread* self, bool collect_profiling_info)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -341,6 +364,9 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  class JniStubKey;
+  class JniStubData;
+
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
@@ -357,6 +383,8 @@
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
+  // Holds compiled code associated with the shorty for a JNI stub.
+  SafeMap<JniStubKey, JniStubData> jni_stubs_map_ GUARDED_BY(lock_);
   // Holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
   // Holds osr compiled code associated to the ArtMethod.
@@ -418,6 +446,7 @@
   // Condition to wait on for accessing inline caches.
   ConditionVariable inline_cache_cond_ GUARDED_BY(lock_);
 
+  friend class art::JitJniStubTestHelper;
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index bb8e5e5..7754777 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -35,6 +35,7 @@
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/file_utils.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/mutex.h"
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
@@ -1582,7 +1583,11 @@
       for (uint32_t method_idx = 0; method_idx < dex_data->num_method_ids; ++method_idx) {
         MethodHotness hotness_info(dex_data->GetHotnessInfo(method_idx));
         if (startup ? hotness_info.IsStartup() : hotness_info.IsPostStartup()) {
-          os << method_idx << ", ";
+          if (dex_file != nullptr) {
+            os << "\n\t\t" << dex_file->PrettyMethod(method_idx, true);
+          } else {
+            os << method_idx << ", ";
+          }
         }
       }
       if (startup == false) {
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 01853de..ee11cfd 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -25,6 +25,7 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/scoped_arena_containers.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -357,8 +358,8 @@
             sampled_methods->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
           }
         } else {
-          CHECK_EQ(method.GetCounter(), 0u) << method.PrettyMethod()
-              << " access_flags=" << method.GetAccessFlags();
+          // We do not record native methods. Once we AOT-compile the app, all native
+          // methods shall have their thunks compiled.
         }
       }
     }
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 1e55158..48fc5f7 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -28,7 +28,7 @@
 #include "atomic.h"
 #include "base/allocator.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/mutex.h"
 #include "base/stl_util.h"
 #include "class_linker-inl.h"
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 1ecfe7c..efeff0a 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1783,66 +1783,115 @@
     EXPECT_TRUE(vm_->SetCheckJniEnabled(old_check_jni)); \
   } while (false)
 
+#define TEST_PRIMITIVE_FIELD_FOR_CLASS(cname) \
+  do {  \
+    Thread::Current()->TransitionFromSuspendedToRunnable(); \
+    LoadDex("AllFields"); \
+    bool started = runtime_->Start(); \
+    ASSERT_TRUE(started); \
+    jclass c = env_->FindClass(cname); \
+    ASSERT_NE(c, nullptr); \
+    jobject o = env_->AllocObject(c); \
+    ASSERT_NE(o, nullptr); \
+    \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Boolean, "sZ", "Z", JNI_TRUE, JNI_FALSE); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Byte, "sB", "B", 1, 2); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Char, "sC", "C", 'a', 'b'); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_DOUBLE_EQ, Double, "sD", "D", 1.0, 2.0); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_FLOAT_EQ, Float, "sF", "F", 1.0, 2.0); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Int, "sI", "I", 1, 2); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Long, "sJ", "J", 1, 2); \
+    EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Short, "sS", "S", 1, 2); \
+    \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Boolean, "iZ", "Z", JNI_TRUE, JNI_FALSE); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Byte, "iB", "B", 1, 2); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Char, "iC", "C", 'a', 'b'); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_DOUBLE_EQ, o, Double, "iD", "D", 1.0, 2.0); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_FLOAT_EQ, o, Float, "iF", "F", 1.0, 2.0); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Int, "iI", "I", 1, 2); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Long, "iJ", "J", 1, 2); \
+    EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Short, "iS", "S", 1, 2); \
+  } while (false)
 
 TEST_F(JniInternalTest, GetPrimitiveField_SetPrimitiveField) {
+  TEST_PRIMITIVE_FIELD_FOR_CLASS("AllFields");
+}
+
+TEST_F(JniInternalTest, GetPrimitiveField_SetPrimitiveField_Subclass) {
+  TEST_PRIMITIVE_FIELD_FOR_CLASS("AllFieldsSub");
+}
+
+#define EXPECT_UNRELATED_FIELD_FAILURE(type, field_name, sig, value1) \
+  do { \
+    jfieldID fid = env_->GetStaticFieldID(c, field_name, sig); \
+    EXPECT_NE(fid, nullptr); \
+    CheckJniAbortCatcher jni_abort_catcher; \
+    env_->Get ## type ## Field(uc, fid); \
+    jni_abort_catcher.Check("not valid for an object of class"); \
+    env_->Set ## type ## Field(uc, fid, value1); \
+    jni_abort_catcher.Check("not valid for an object of class"); \
+  } while (false)
+
+TEST_F(JniInternalTest, GetField_SetField_unrelated) {
   Thread::Current()->TransitionFromSuspendedToRunnable();
   LoadDex("AllFields");
   bool started = runtime_->Start();
   ASSERT_TRUE(started);
-
   jclass c = env_->FindClass("AllFields");
   ASSERT_NE(c, nullptr);
-  jobject o = env_->AllocObject(c);
-  ASSERT_NE(o, nullptr);
-
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Boolean, "sZ", "Z", JNI_TRUE, JNI_FALSE);
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Byte, "sB", "B", 1, 2);
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Char, "sC", "C", 'a', 'b');
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_DOUBLE_EQ, Double, "sD", "D", 1.0, 2.0);
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_FLOAT_EQ, Float, "sF", "F", 1.0, 2.0);
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Int, "sI", "I", 1, 2);
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Long, "sJ", "J", 1, 2);
-  EXPECT_STATIC_PRIMITIVE_FIELD(EXPECT_EQ, Short, "sS", "S", 1, 2);
-
-  EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Boolean, "iZ", "Z", JNI_TRUE, JNI_FALSE);
-  EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Byte, "iB", "B", 1, 2);
-  EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Char, "iC", "C", 'a', 'b');
-  EXPECT_PRIMITIVE_FIELD(EXPECT_DOUBLE_EQ, o, Double, "iD", "D", 1.0, 2.0);
-  EXPECT_PRIMITIVE_FIELD(EXPECT_FLOAT_EQ, o, Float, "iF", "F", 1.0, 2.0);
-  EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Int, "iI", "I", 1, 2);
-  EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Long, "iJ", "J", 1, 2);
-  EXPECT_PRIMITIVE_FIELD(EXPECT_EQ, o, Short, "iS", "S", 1, 2);
+  jclass uc = env_->FindClass("AllFieldsUnrelated");
+  ASSERT_NE(uc, nullptr);
+  bool old_check_jni = vm_->SetCheckJniEnabled(true);
+  EXPECT_UNRELATED_FIELD_FAILURE(Boolean, "sZ", "Z", JNI_TRUE);
+  EXPECT_UNRELATED_FIELD_FAILURE(Byte, "sB", "B", 1);
+  EXPECT_UNRELATED_FIELD_FAILURE(Char, "sC", "C", 'a');
+  EXPECT_UNRELATED_FIELD_FAILURE(Double, "sD", "D", 1.0);
+  EXPECT_UNRELATED_FIELD_FAILURE(Float, "sF", "F", 1.0);
+  EXPECT_UNRELATED_FIELD_FAILURE(Int, "sI", "I", 1);
+  EXPECT_UNRELATED_FIELD_FAILURE(Long, "sJ", "J", 1);
+  EXPECT_UNRELATED_FIELD_FAILURE(Short, "sS", "S", 1);
+  EXPECT_UNRELATED_FIELD_FAILURE(Object, "sObject", "Ljava/lang/Object;", c);
+  EXPECT_TRUE(vm_->SetCheckJniEnabled(old_check_jni));
 }
 
+#define TEST_OBJECT_FIELD_FOR_CLASS(cname) \
+  do { \
+    Thread::Current()->TransitionFromSuspendedToRunnable(); \
+    LoadDex("AllFields"); \
+    runtime_->Start(); \
+    \
+    jclass c = env_->FindClass(cname); \
+    ASSERT_NE(c, nullptr); \
+    jobject o = env_->AllocObject(c); \
+    ASSERT_NE(o, nullptr); \
+    \
+    jstring s1 = env_->NewStringUTF("hello"); \
+    ASSERT_NE(s1, nullptr); \
+    jstring s2 = env_->NewStringUTF("world"); \
+    ASSERT_NE(s2, nullptr); \
+    \
+    jfieldID s_fid = env_->GetStaticFieldID(c, "sObject", "Ljava/lang/Object;"); \
+    ASSERT_NE(s_fid, nullptr); \
+    jfieldID i_fid = env_->GetFieldID(c, "iObject", "Ljava/lang/Object;"); \
+    ASSERT_NE(i_fid, nullptr); \
+    \
+    env_->SetStaticObjectField(c, s_fid, s1); \
+    ASSERT_TRUE(env_->IsSameObject(s1, env_->GetStaticObjectField(c, s_fid))); \
+    env_->SetStaticObjectField(c, s_fid, s2); \
+    ASSERT_TRUE(env_->IsSameObject(s2, env_->GetStaticObjectField(c, s_fid))); \
+    \
+    env_->SetObjectField(o, i_fid, s1); \
+    ASSERT_TRUE(env_->IsSameObject(s1, env_->GetObjectField(o, i_fid))); \
+    env_->SetObjectField(o, i_fid, s2); \
+    ASSERT_TRUE(env_->IsSameObject(s2, env_->GetObjectField(o, i_fid))); \
+  } while (false)
+
 TEST_F(JniInternalTest, GetObjectField_SetObjectField) {
-  Thread::Current()->TransitionFromSuspendedToRunnable();
-  LoadDex("AllFields");
-  runtime_->Start();
+  TEST_OBJECT_FIELD_FOR_CLASS("AllFields");
+}
 
-  jclass c = env_->FindClass("AllFields");
-  ASSERT_NE(c, nullptr);
-  jobject o = env_->AllocObject(c);
-  ASSERT_NE(o, nullptr);
-
-  jstring s1 = env_->NewStringUTF("hello");
-  ASSERT_NE(s1, nullptr);
-  jstring s2 = env_->NewStringUTF("world");
-  ASSERT_NE(s2, nullptr);
-
-  jfieldID s_fid = env_->GetStaticFieldID(c, "sObject", "Ljava/lang/Object;");
-  ASSERT_NE(s_fid, nullptr);
-  jfieldID i_fid = env_->GetFieldID(c, "iObject", "Ljava/lang/Object;");
-  ASSERT_NE(i_fid, nullptr);
-
-  env_->SetStaticObjectField(c, s_fid, s1);
-  ASSERT_TRUE(env_->IsSameObject(s1, env_->GetStaticObjectField(c, s_fid)));
-  env_->SetStaticObjectField(c, s_fid, s2);
-  ASSERT_TRUE(env_->IsSameObject(s2, env_->GetStaticObjectField(c, s_fid)));
-
-  env_->SetObjectField(o, i_fid, s1);
-  ASSERT_TRUE(env_->IsSameObject(s1, env_->GetObjectField(o, i_fid)));
-  env_->SetObjectField(o, i_fid, s2);
-  ASSERT_TRUE(env_->IsSameObject(s2, env_->GetObjectField(o, i_fid)));
+TEST_F(JniInternalTest, GetObjectField_SetObjectField_subclass) {
+  TEST_OBJECT_FIELD_FOR_CLASS("AllFieldsSub");
 }
 
 TEST_F(JniInternalTest, NewLocalRef_nullptr) {
diff --git a/runtime/leb128.h b/runtime/leb128.h
index 31459af..2bfed7f 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -19,8 +19,10 @@
 
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "globals.h"
 
 namespace art {
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index b9aa0b7..fac1a75 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -20,8 +20,9 @@
 #include <cstdint>
 #include <iosfwd>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "read_barrier.h"
 
 namespace art {
diff --git a/runtime/managed_stack-inl.h b/runtime/managed_stack-inl.h
index 689dd80..678be8e 100644
--- a/runtime/managed_stack-inl.h
+++ b/runtime/managed_stack-inl.h
@@ -24,7 +24,7 @@
 namespace art {
 
 inline ShadowFrame* ManagedStack::PushShadowFrame(ShadowFrame* new_top_frame) {
-  DCHECK(top_quick_frame_ == nullptr);
+  DCHECK(!HasTopQuickFrame());
   ShadowFrame* old_frame = top_shadow_frame_;
   top_shadow_frame_ = new_top_frame;
   new_top_frame->SetLink(old_frame);
@@ -32,7 +32,7 @@
 }
 
 inline ShadowFrame* ManagedStack::PopShadowFrame() {
-  DCHECK(top_quick_frame_ == nullptr);
+  DCHECK(!HasTopQuickFrame());
   CHECK(top_shadow_frame_ != nullptr);
   ShadowFrame* frame = top_shadow_frame_;
   top_shadow_frame_ = frame->GetLink();
diff --git a/runtime/managed_stack.h b/runtime/managed_stack.h
index 4f1984d..d1c230f 100644
--- a/runtime/managed_stack.h
+++ b/runtime/managed_stack.h
@@ -21,9 +21,11 @@
 #include <cstring>
 #include <string>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "base/bit_utils.h"
 
 namespace art {
 
@@ -42,7 +44,9 @@
 class PACKED(4) ManagedStack {
  public:
   ManagedStack()
-      : top_quick_frame_(nullptr), link_(nullptr), top_shadow_frame_(nullptr) {}
+      : tagged_top_quick_frame_(TaggedTopQuickFrame::CreateNotTagged(nullptr)),
+        link_(nullptr),
+        top_shadow_frame_(nullptr) {}
 
   void PushManagedStackFragment(ManagedStack* fragment) {
     // Copy this top fragment into given fragment.
@@ -63,17 +67,36 @@
     return link_;
   }
 
+  ArtMethod** GetTopQuickFrameKnownNotTagged() const {
+    return tagged_top_quick_frame_.GetSpKnownNotTagged();
+  }
+
   ArtMethod** GetTopQuickFrame() const {
-    return top_quick_frame_;
+    return tagged_top_quick_frame_.GetSp();
+  }
+
+  bool GetTopQuickFrameTag() const {
+    return tagged_top_quick_frame_.GetTag();
+  }
+
+  bool HasTopQuickFrame() const {
+    return tagged_top_quick_frame_.GetTaggedSp() != 0u;
   }
 
   void SetTopQuickFrame(ArtMethod** top) {
     DCHECK(top_shadow_frame_ == nullptr);
-    top_quick_frame_ = top;
+    DCHECK_ALIGNED(top, 4u);
+    tagged_top_quick_frame_ = TaggedTopQuickFrame::CreateNotTagged(top);
   }
 
-  static size_t TopQuickFrameOffset() {
-    return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_);
+  void SetTopQuickFrameTagged(ArtMethod** top) {
+    DCHECK(top_shadow_frame_ == nullptr);
+    DCHECK_ALIGNED(top, 4u);
+    tagged_top_quick_frame_ = TaggedTopQuickFrame::CreateTagged(top);
+  }
+
+  static size_t TaggedTopQuickFrameOffset() {
+    return OFFSETOF_MEMBER(ManagedStack, tagged_top_quick_frame_);
   }
 
   ALWAYS_INLINE ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame);
@@ -83,8 +106,12 @@
     return top_shadow_frame_;
   }
 
+  bool HasTopShadowFrame() const {
+    return GetTopShadowFrame() != nullptr;
+  }
+
   void SetTopShadowFrame(ShadowFrame* top) {
-    DCHECK(top_quick_frame_ == nullptr);
+    DCHECK_EQ(tagged_top_quick_frame_.GetTaggedSp(), 0u);
     top_shadow_frame_ = top;
   }
 
@@ -97,7 +124,47 @@
   bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
 
  private:
-  ArtMethod** top_quick_frame_;
+  // Encodes the top quick frame (which must be at least 4-byte aligned)
+  // and a flag that marks the GenericJNI trampoline.
+  class TaggedTopQuickFrame {
+   public:
+    static TaggedTopQuickFrame CreateNotTagged(ArtMethod** sp) {
+      DCHECK_ALIGNED(sp, 4u);
+      return TaggedTopQuickFrame(reinterpret_cast<uintptr_t>(sp));
+    }
+
+    static TaggedTopQuickFrame CreateTagged(ArtMethod** sp) {
+      DCHECK_ALIGNED(sp, 4u);
+      return TaggedTopQuickFrame(reinterpret_cast<uintptr_t>(sp) | 1u);
+    }
+
+    // Get SP known to be not tagged and non-null.
+    ArtMethod** GetSpKnownNotTagged() const {
+      DCHECK(!GetTag());
+      DCHECK_NE(tagged_sp_, 0u);
+      return reinterpret_cast<ArtMethod**>(tagged_sp_);
+    }
+
+    ArtMethod** GetSp() const {
+      return reinterpret_cast<ArtMethod**>(tagged_sp_ & ~static_cast<uintptr_t>(1u));
+    }
+
+    bool GetTag() const {
+      return (tagged_sp_ & 1u) != 0u;
+    }
+
+    uintptr_t GetTaggedSp() const {
+      return tagged_sp_;
+    }
+
+   private:
+    explicit TaggedTopQuickFrame(uintptr_t tagged_sp) : tagged_sp_(tagged_sp) { }
+
+    uintptr_t tagged_sp_;
+  };
+  static_assert(sizeof(TaggedTopQuickFrame) == sizeof(uintptr_t), "TaggedTopQuickFrame size check");
+
+  TaggedTopQuickFrame tagged_top_quick_frame_;
   ManagedStack* link_;
   ShadowFrame* top_shadow_frame_;
 };
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 7f68d2f..8abf8a6 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -35,6 +35,7 @@
 #include "base/allocator.h"
 #include "base/bit_utils.h"
 #include "base/file_utils.h"
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "base/memory_tool.h"
 #include "globals.h"
 #include "utils.h"
@@ -59,14 +60,15 @@
 
 static std::ostream& operator<<(
     std::ostream& os,
-    std::pair<BacktraceMap::const_iterator, BacktraceMap::const_iterator> iters) {
-  for (BacktraceMap::const_iterator it = iters.first; it != iters.second; ++it) {
+    std::pair<BacktraceMap::iterator, BacktraceMap::iterator> iters) {
+  for (BacktraceMap::iterator it = iters.first; it != iters.second; ++it) {
+    const backtrace_map_t* entry = *it;
     os << StringPrintf("0x%08x-0x%08x %c%c%c %s\n",
-                       static_cast<uint32_t>(it->start),
-                       static_cast<uint32_t>(it->end),
-                       (it->flags & PROT_READ) ? 'r' : '-',
-                       (it->flags & PROT_WRITE) ? 'w' : '-',
-                       (it->flags & PROT_EXEC) ? 'x' : '-', it->name.c_str());
+                       static_cast<uint32_t>(entry->start),
+                       static_cast<uint32_t>(entry->end),
+                       (entry->flags & PROT_READ) ? 'r' : '-',
+                       (entry->flags & PROT_WRITE) ? 'w' : '-',
+                       (entry->flags & PROT_EXEC) ? 'x' : '-', entry->name.c_str());
   }
   return os;
 }
@@ -170,9 +172,10 @@
   }
 
   ScopedBacktraceMapIteratorLock lock(map.get());
-  for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
-    if ((begin >= it->start && begin < it->end)  // start of new within old
-        && (end > it->start && end <= it->end)) {  // end of new within old
+  for (BacktraceMap::iterator it = map->begin(); it != map->end(); ++it) {
+    const backtrace_map_t* entry = *it;
+    if ((begin >= entry->start && begin < entry->end)     // start of new within old
+        && (end > entry->start && end <= entry->end)) {   // end of new within old
       return true;
     }
   }
@@ -194,17 +197,18 @@
     return false;
   }
   ScopedBacktraceMapIteratorLock lock(map.get());
-  for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
-    if ((begin >= it->start && begin < it->end)      // start of new within old
-        || (end > it->start && end < it->end)        // end of new within old
-        || (begin <= it->start && end > it->end)) {  // start/end of new includes all of old
+  for (BacktraceMap::iterator it = map->begin(); it != map->end(); ++it) {
+    const backtrace_map_t* entry = *it;
+    if ((begin >= entry->start && begin < entry->end)      // start of new within old
+        || (end > entry->start && end < entry->end)        // end of new within old
+        || (begin <= entry->start && end > entry->end)) {  // start/end of new includes all of old
       std::ostringstream map_info;
       map_info << std::make_pair(it, map->end());
       *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " overlaps with "
                                 "existing map 0x%08" PRIxPTR "-0x%08" PRIxPTR " (%s)\n%s",
                                 begin, end,
-                                static_cast<uintptr_t>(it->start), static_cast<uintptr_t>(it->end),
-                                it->name.c_str(),
+                                static_cast<uintptr_t>(entry->start), static_cast<uintptr_t>(entry->end),
+                                entry->name.c_str(),
                                 map_info.str().c_str());
       return false;
     }
diff --git a/runtime/memory_region.cc b/runtime/memory_region.cc
index 13cc5c9..862ff73 100644
--- a/runtime/memory_region.cc
+++ b/runtime/memory_region.cc
@@ -19,9 +19,6 @@
 #include <stdint.h>
 #include <string.h>
 
-#include "base/logging.h"
-#include "globals.h"
-
 namespace art {
 
 void MemoryRegion::CopyFrom(size_t offset, const MemoryRegion& from) const {
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index 7cf5d49..23e0aec 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -20,10 +20,11 @@
 #include <stdint.h>
 #include <type_traits>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "base/value_object.h"
 #include "globals.h"
diff --git a/runtime/method_info.h b/runtime/method_info.h
index 5a72125..6485af9 100644
--- a/runtime/method_info.h
+++ b/runtime/method_info.h
@@ -17,7 +17,9 @@
 #ifndef ART_RUNTIME_METHOD_INFO_H_
 #define ART_RUNTIME_METHOD_INFO_H_
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/macros.h"
 #include "leb128.h"
 #include "memory_region.h"
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 2281245..636c84c 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -19,11 +19,11 @@
 
 #include "array.h"
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "base/bit_utils.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "class.h"
 #include "gc/heap-inl.h"
 #include "obj_ptr-inl.h"
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index eb54f7f..b4f5d81 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -440,7 +440,6 @@
     // cache. Use LookupResolveType here to search the class table if it is not in the dex cache.
     // should be no thread suspension due to the class being resolved.
     ObjPtr<Class> dex_access_to = Runtime::Current()->GetClassLinker()->LookupResolvedType(
-        *dex_cache->GetDexFile(),
         class_idx,
         dex_cache,
         access_to->GetClassLoader());
@@ -477,7 +476,6 @@
     // The referenced class has already been resolved with the method, but may not be in the dex
     // cache.
     ObjPtr<Class> dex_access_to = Runtime::Current()->GetClassLinker()->LookupResolvedType(
-        *dex_cache->GetDexFile(),
         class_idx,
         dex_cache,
         access_to->GetClassLoader());
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 892c039..e0a341d 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -20,6 +20,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/logging.h"  // For VLOG.
 #include "class-inl.h"
 #include "class_ext.h"
 #include "class_linker-inl.h"
@@ -1034,7 +1035,7 @@
     return interfaces->Get(idx);
   } else {
     dex::TypeIndex type_idx = klass->GetDirectInterfaceTypeIdx(idx);
-    ObjPtr<Class> interface = ClassLinker::LookupResolvedType(
+    ObjPtr<Class> interface = Runtime::Current()->GetClassLinker()->LookupResolvedType(
         type_idx, klass->GetDexCache(), klass->GetClassLoader());
     return interface;
   }
@@ -1046,9 +1047,7 @@
     DCHECK(!klass->IsArrayClass());
     DCHECK(!klass->IsProxyClass());
     dex::TypeIndex type_idx = klass->GetDirectInterfaceTypeIdx(idx);
-    interface = Runtime::Current()->GetClassLinker()->ResolveType(klass->GetDexFile(),
-                                                                  type_idx,
-                                                                  klass.Get());
+    interface = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, klass.Get());
     CHECK(interface != nullptr || self->IsExceptionPending());
   }
   return interface;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 8b11c12..8d4d44b 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -19,11 +19,12 @@
 
 #include "dex_cache.h"
 
+#include <android-base/logging.h>
+
 #include "art_field.h"
 #include "art_method.h"
 #include "base/casts.h"
 #include "base/enums.h"
-#include "base/logging.h"
 #include "class_linker.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 2f63dff..eb4db00 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -17,7 +17,6 @@
 #include "dex_cache-inl.h"
 
 #include "art_method-inl.h"
-#include "base/logging.h"
 #include "class_linker.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index f75786b..509db02 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -19,6 +19,7 @@
 
 #include "array.h"
 #include "base/bit_utils.h"
+#include "base/mutex.h"
 #include "dex_file_types.h"
 #include "object.h"
 #include "object_array.h"
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 8198636..d2bff2c 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -150,13 +150,11 @@
   const DexFile::MethodId& method2_id = dex_file.GetMethodId(method2->GetDexMethodIndex());
   Handle<mirror::MethodType> method1_type = hs.NewHandle(
       class_linker_->ResolveMethodType(soa.Self(),
-                                       dex_file,
                                        method1_id.proto_idx_,
                                        dex_cache,
                                        class_loader));
   Handle<mirror::MethodType> method2_type = hs.NewHandle(
       class_linker_->ResolveMethodType(soa.Self(),
-                                       dex_file,
                                        method2_id.proto_idx_,
                                        dex_cache,
                                        class_loader));
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 87cc620..97fb793 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -240,7 +240,7 @@
         CHECK_NE(field.GetTypeAsPrimitiveType(), Primitive::kPrimNot);
         // TODO: resolve the field type for moving GC.
         ObjPtr<mirror::Class> field_type =
-            kMovingCollector ? field.LookupType() : field.ResolveType();
+            kMovingCollector ? field.LookupResolvedType() : field.ResolveType();
         if (field_type != nullptr) {
           CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
         }
@@ -258,7 +258,7 @@
         CHECK_NE(field.GetTypeAsPrimitiveType(), Primitive::kPrimNot);
         // TODO: resolve the field type for moving GC.
         ObjPtr<mirror::Class> field_type =
-            kMovingCollector ? field.LookupType() : field.ResolveType();
+            kMovingCollector ? field.LookupResolvedType() : field.ResolveType();
         if (field_type != nullptr) {
           CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
         }
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index cdc55bd..1252905 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -21,6 +21,7 @@
 #include "android-base/stringprintf.h"
 
 #include "art_method-inl.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/mutex.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -1401,26 +1402,38 @@
 
   // Ask the verifier for the dex pcs of all the monitor-enter instructions corresponding to
   // the locks held in this stack frame.
-  std::vector<uint32_t> monitor_enter_dex_pcs;
+  std::vector<verifier::MethodVerifier::DexLockInfo> monitor_enter_dex_pcs;
   verifier::MethodVerifier::FindLocksAtDexPc(m, dex_pc, &monitor_enter_dex_pcs);
-  for (uint32_t monitor_dex_pc : monitor_enter_dex_pcs) {
-    // The verifier works in terms of the dex pcs of the monitor-enter instructions.
-    // We want the registers used by those instructions (so we can read the values out of them).
-    const Instruction* monitor_enter_instruction =
-        Instruction::At(&code_item->insns_[monitor_dex_pc]);
+  for (verifier::MethodVerifier::DexLockInfo& dex_lock_info : monitor_enter_dex_pcs) {
+    // As a debug check, check that dex PC corresponds to a monitor-enter.
+    if (kIsDebugBuild) {
+      const Instruction* monitor_enter_instruction =
+          Instruction::At(&code_item->insns_[dex_lock_info.dex_pc]);
+      CHECK_EQ(monitor_enter_instruction->Opcode(), Instruction::MONITOR_ENTER)
+          << "expected monitor-enter @" << dex_lock_info.dex_pc << "; was "
+          << reinterpret_cast<const void*>(monitor_enter_instruction);
+    }
 
-    // Quick sanity check.
-    CHECK_EQ(monitor_enter_instruction->Opcode(), Instruction::MONITOR_ENTER)
-      << "expected monitor-enter @" << monitor_dex_pc << "; was "
-      << reinterpret_cast<const void*>(monitor_enter_instruction);
-
-    uint16_t monitor_register = monitor_enter_instruction->VRegA();
-    uint32_t value;
-    bool success = stack_visitor->GetVReg(m, monitor_register, kReferenceVReg, &value);
-    CHECK(success) << "Failed to read v" << monitor_register << " of kind "
-                   << kReferenceVReg << " in method " << m->PrettyMethod();
-    mirror::Object* o = reinterpret_cast<mirror::Object*>(value);
-    callback(o, callback_context);
+    // Iterate through the set of dex registers, as the compiler may not have held all of them
+    // live.
+    bool success = false;
+    for (uint32_t dex_reg : dex_lock_info.dex_registers) {
+      uint32_t value;
+      success = stack_visitor->GetVReg(m, dex_reg, kReferenceVReg, &value);
+      if (success) {
+        mirror::Object* o = reinterpret_cast<mirror::Object*>(value);
+        callback(o, callback_context);
+        break;
+      }
+    }
+    DCHECK(success) << "Failed to find/read reference for monitor-enter at dex pc "
+                    << dex_lock_info.dex_pc
+                    << " in method "
+                    << m->PrettyMethod();
+    if (!success) {
+      LOG(WARNING) << "Had a lock reported for dex pc " << dex_lock_info.dex_pc
+                   << " but was not able to fetch a corresponding object!";
+    }
   }
 }
 
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index d00f979..cf5934b 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -16,7 +16,7 @@
 
 #include "monitor_pool.h"
 
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/mutex-inl.h"
 #include "monitor.h"
 #include "thread-current-inl.h"
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 2235563..c0de374 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -744,6 +744,23 @@
   return result;
 }
 
+static jlong DexFile_getStaticSizeOfDexFile(JNIEnv* env, jclass, jobject cookie) {
+  const OatFile* oat_file = nullptr;
+  std::vector<const DexFile*> dex_files;
+  if (!ConvertJavaArrayToDexFiles(env, cookie, /*out */ dex_files, /* out */ oat_file)) {
+    DCHECK(env->ExceptionCheck());
+    return 0;
+  }
+
+  uint64_t file_size = 0;
+  for (auto& dex_file : dex_files) {
+    if (dex_file) {
+      file_size += dex_file->GetHeader().file_size_;
+    }
+  }
+  return static_cast<jlong>(file_size);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile,
@@ -779,7 +796,8 @@
   NATIVE_METHOD(DexFile, getDexFileStatus,
                 "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, getDexFileOutputPaths,
-                "(Ljava/lang/String;Ljava/lang/String;)[Ljava/lang/String;")
+                "(Ljava/lang/String;Ljava/lang/String;)[Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, getStaticSizeOfDexFile, "(Ljava/lang/Object;)J")
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 2663bea..88a78ab 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -319,6 +319,53 @@
   return soa.AddLocalReference<jlongArray>(long_counts);
 }
 
+static jobjectArray VMDebug_getInstancesOfClasses(JNIEnv* env,
+                                                  jclass,
+                                                  jobjectArray javaClasses,
+                                                  jboolean includeAssignable) {
+  ScopedObjectAccess soa(env);
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::ObjectArray<mirror::Class>> classes = hs.NewHandle(
+      soa.Decode<mirror::ObjectArray<mirror::Class>>(javaClasses));
+  if (classes == nullptr) {
+    return nullptr;
+  }
+
+  jclass object_array_class = env->FindClass("[Ljava/lang/Object;");
+  if (env->ExceptionCheck() == JNI_TRUE) {
+    return nullptr;
+  }
+  CHECK(object_array_class != nullptr);
+
+  size_t num_classes = classes->GetLength();
+  jobjectArray result = env->NewObjectArray(num_classes, object_array_class, nullptr);
+  if (env->ExceptionCheck() == JNI_TRUE) {
+    return nullptr;
+  }
+
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  MutableHandle<mirror::Class> h_class(hs.NewHandle<mirror::Class>(nullptr));
+  for (size_t i = 0; i < num_classes; ++i) {
+    h_class.Assign(classes->Get(i));
+
+    VariableSizedHandleScope hs2(soa.Self());
+    std::vector<Handle<mirror::Object>> raw_instances;
+    heap->GetInstances(hs2, h_class, includeAssignable, /* max_count */ 0, raw_instances);
+    jobjectArray array = env->NewObjectArray(raw_instances.size(),
+                                             WellKnownClasses::java_lang_Object,
+                                             nullptr);
+    if (env->ExceptionCheck() == JNI_TRUE) {
+      return nullptr;
+    }
+
+    for (size_t j = 0; j < raw_instances.size(); ++j) {
+      env->SetObjectArrayElement(array, j, raw_instances[j].ToJObject());
+    }
+    env->SetObjectArrayElement(result, i, array);
+  }
+  return result;
+}
+
 // We export the VM internal per-heap-space size/alloc/free metrics
 // for the zygote space, alloc space (application heap), and the large
 // object space for dumpsys meminfo. The other memory region data such
@@ -534,6 +581,7 @@
   NATIVE_METHOD(VMDebug, dumpReferenceTables, "()V"),
   NATIVE_METHOD(VMDebug, getAllocCount, "(I)I"),
   NATIVE_METHOD(VMDebug, getHeapSpaceStats, "([J)V"),
+  NATIVE_METHOD(VMDebug, getInstancesOfClasses, "([Ljava/lang/Class;Z)[[Ljava/lang/Object;"),
   NATIVE_METHOD(VMDebug, getInstructionCount, "([I)V"),
   FAST_NATIVE_METHOD(VMDebug, getLoadedClassCount, "()I"),
   NATIVE_METHOD(VMDebug, getVmFeatureList, "()[Ljava/lang/String;"),
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 2d1f886..1b5c535 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -375,8 +375,8 @@
   }
   const DexFile* dex_file = dex_cache->GetDexFile();
   const DexFile::FieldId& field_id = dex_file->GetFieldId(field_idx);
-  ObjPtr<mirror::Class> klass =
-      ClassLinker::LookupResolvedType(field_id.class_idx_, dex_cache, nullptr);
+  ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
+      field_id.class_idx_, dex_cache, /* class_loader */ nullptr);
   if (klass == nullptr) {
     return;
   }
@@ -401,8 +401,8 @@
   }
   const DexFile* dex_file = dex_cache->GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(method_idx);
-  ObjPtr<mirror::Class> klass =
-      ClassLinker::LookupResolvedType(method_id.class_idx_, dex_cache, nullptr);
+  ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
+      method_id.class_idx_, dex_cache, /* class_loader */ nullptr);
   if (klass == nullptr) {
     return;
   }
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index a7bee39..fd80aae 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,11 +18,14 @@
 
 #include <stdlib.h>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "arch/instruction_set.h"
 #include "art_method-inl.h"
-#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "base/runtime_debug.h"
 #include "debugger.h"
 #include "java_vm_ext.h"
 #include "jit/jit.h"
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 9359ffc..da5cee1 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -146,11 +146,11 @@
   // with kActiveTransaction == false.
   DCHECK(!Runtime::Current()->IsActiveTransaction());
 
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
   MutableHandle<mirror::Class> interface(hs.NewHandle<mirror::Class>(nullptr));
   for (uint32_t i = 0; i < num_ifaces; ++i) {
     const dex::TypeIndex type_idx = iface_list->GetTypeItem(i).type_idx_;
-    interface.Assign(ClassLinker::LookupResolvedType(
-        type_idx, klass->GetDexCache(), klass->GetClassLoader()));
+    interface.Assign(linker->LookupResolvedType(type_idx, klass.Get()));
     ifaces->SetWithoutChecks<false>(i, interface.Get());
   }
 
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
index c79f51b..8f8fd71 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
@@ -16,8 +16,9 @@
 
 #include "org_apache_harmony_dalvik_ddmc_DdmServer.h"
 
+#include <android-base/logging.h>
+
 #include "base/array_ref.h"
-#include "base/logging.h"
 #include "debugger.h"
 #include "jni_internal.h"
 #include "native_util.h"
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index f5057b0..7b73382 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -16,8 +16,9 @@
 
 #include "org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 
+#include <android-base/logging.h>
+
 #include "base/file_utils.h"
-#include "base/logging.h"
 #include "base/mutex.h"
 #include "debugger.h"
 #include "gc/heap.h"
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index cd8315c..10d1091 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -22,7 +22,7 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/macros.h"
 #include "dex_file-inl.h"
 #include "jni_internal.h"
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index f166714..ec94552 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -40,6 +40,7 @@
 #include "android-base/stringprintf.h"
 
 #include "arch/instruction_set.h"
+#include "base/aborting.h"
 #include "base/file_utils.h"
 #include "base/memory_tool.h"
 #include "base/mutex.h"
diff --git a/runtime/non_debuggable_classes.cc b/runtime/non_debuggable_classes.cc
index 7db199c..8484e2c 100644
--- a/runtime/non_debuggable_classes.cc
+++ b/runtime/non_debuggable_classes.cc
@@ -16,7 +16,6 @@
 
 #include "non_debuggable_classes.h"
 
-#include "base/logging.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "nativehelper/scoped_local_ref.h"
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index c82df71..32f8df7 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -39,6 +39,7 @@
 #include "base/bit_vector.h"
 #include "base/enums.h"
 #include "base/file_utils.h"
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
@@ -314,7 +315,7 @@
   if (requested_base != nullptr && begin_ != requested_base) {
     // Host can fail this check. Do not dump there to avoid polluting the output.
     if (kIsTargetBuild && (kIsDebugBuild || VLOG_IS_ON(oat))) {
-      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", android::base::LogSeverity::WARNING);
     }
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
         "oatdata=%p != expected=%p. See process maps in the log.",
@@ -1068,7 +1069,7 @@
     dl_iterate_context context0 = { Begin(), &dlopen_mmaps_, 0, 0};
     if (dl_iterate_phdr(dl_iterate_context::callback, &context0) == 0) {
       // OK, give up and print an error.
-      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", android::base::LogSeverity::WARNING);
       LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but cannot find its mmaps.";
     }
   }
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index cd18ce1..8707e73 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -24,7 +24,7 @@
 #include "android-base/strings.h"
 
 #include "base/file_utils.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/stl_util.h"
 #include "class_linker.h"
 #include "compiler_filter.h"
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 3071348..91a138a 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -26,7 +26,7 @@
 #include "art_field-inl.h"
 #include "base/bit_vector-inl.h"
 #include "base/file_utils.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "class_linker.h"
@@ -445,8 +445,13 @@
     // if it's in the class path). Note this trades correctness for performance
     // since the resulting slow down is unacceptable in some cases until b/64530081
     // is fixed.
+    // We still pass the class loader context when the classpath string of the runtime
+    // is not empty, which is the situation when ART is invoked standalone.
+    ClassLoaderContext* actual_context = Runtime::Current()->GetClassPathString().empty()
+        ? nullptr
+        : context.get();
     switch (oat_file_assistant.MakeUpToDate(/*profile_changed*/ false,
-                                            /*class_loader_context*/ nullptr,
+                                            actual_context,
                                             /*out*/ &error_msg)) {
       case OatFileAssistant::kUpdateFailed:
         LOG(WARNING) << error_msg;
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index a463f70..1b3e000 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -23,7 +23,8 @@
 #include <cstddef>
 #include <memory>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/unix_file/fd_file.h"
 
 namespace art {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 85af560..a3c0036 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -18,8 +18,10 @@
 
 #include <sstream>
 
+#include <android-base/logging.h>
+
 #include "base/file_utils.h"
-#include "base/logging.h"
+#include "base/macros.h"
 #include "base/stringpiece.h"
 #include "debugger.h"
 #include "gc/heap.h"
diff --git a/runtime/plugin.cc b/runtime/plugin.cc
index 6aa0787..7d86f1d 100644
--- a/runtime/plugin.cc
+++ b/runtime/plugin.cc
@@ -20,8 +20,6 @@
 
 #include "android-base/stringprintf.h"
 
-#include "base/logging.h"
-
 namespace art {
 
 using android::base::StringPrintf;
diff --git a/runtime/plugin.h b/runtime/plugin.h
index f077aaf..909c710 100644
--- a/runtime/plugin.h
+++ b/runtime/plugin.h
@@ -18,7 +18,8 @@
 #define ART_RUNTIME_PLUGIN_H_
 
 #include <string>
-#include "base/logging.h"
+
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/primitive.h b/runtime/primitive.h
index a429914..5b163d8 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -19,7 +19,8 @@
 
 #include <sys/types.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 
 namespace art {
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index f94923e..a7771ab 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -19,6 +19,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/logging.h"  // For VLOG_IS_ON.
 #include "dex_file_types.h"
 #include "dex_instruction.h"
 #include "entrypoints/entrypoint_utils.h"
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index 12b63c9..1103dab 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -17,7 +17,8 @@
 #ifndef ART_RUNTIME_QUICK_EXCEPTION_HANDLER_H_
 #define ART_RUNTIME_QUICK_EXCEPTION_HANDLER_H_
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "deoptimization_kind.h"
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index d4b9f43..e8df2ad 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -17,9 +17,11 @@
 #ifndef ART_RUNTIME_READ_BARRIER_H_
 #define ART_RUNTIME_READ_BARRIER_H_
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 #include "base/mutex.h"
+#include "base/runtime_debug.h"
 #include "gc_root.h"
 #include "jni.h"
 #include "mirror/object_reference.h"
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index f09b6c9..1cdeb7c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -57,6 +57,7 @@
 #include "asm_support.h"
 #include "asm_support_check.h"
 #include "atomic.h"
+#include "base/aborting.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
 #include "base/enums.h"
@@ -256,6 +257,7 @@
       force_native_bridge_(false),
       is_native_bridge_loaded_(false),
       is_native_debuggable_(false),
+      async_exceptions_thrown_(false),
       is_java_debuggable_(false),
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
@@ -295,6 +297,7 @@
   }
 
   if (dump_gc_performance_on_shutdown_) {
+    ScopedLogSeverity sls(LogSeverity::INFO);
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
     // to be still alive.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6b01cc2..476b71f 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -586,6 +586,14 @@
     is_native_debuggable_ = value;
   }
 
+  bool AreAsyncExceptionsThrown() const {
+    return async_exceptions_thrown_;
+  }
+
+  void SetAsyncExceptionsThrown() {
+    async_exceptions_thrown_ = true;
+  }
+
   // Returns the build fingerprint, if set. Otherwise an empty string is returned.
   std::string GetFingerprint() {
     return fingerprint_;
@@ -899,6 +907,10 @@
   // Whether we are running under native debugger.
   bool is_native_debuggable_;
 
+  // whether or not any async exceptions have ever been thrown. This is used to speed up the
+  // MterpShouldSwitchInterpreters function.
+  bool async_exceptions_thrown_;
+
   // Whether Java code needs to be debuggable.
   bool is_java_debuggable_;
 
diff --git a/runtime/runtime_common.cc b/runtime/runtime_common.cc
index eb69d91..59af918 100644
--- a/runtime/runtime_common.cc
+++ b/runtime/runtime_common.cc
@@ -23,10 +23,12 @@
 #include <sstream>
 #include <string>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
+#include "base/aborting.h"
 #include "base/file_utils.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For LogHelper, GetCmdLine.
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "native_stack_dump.h"
@@ -430,7 +432,7 @@
     logger(LOG_STREAM(FATAL_WITHOUT_ABORT));
   }
   if (kIsDebugBuild && signal_number == SIGSEGV) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
+    PrintFileToLog("/proc/self/maps", android::base::LogSeverity::FATAL_WITHOUT_ABORT);
   }
 
   Runtime* runtime = Runtime::Current();
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index f298691..33e45bd 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -21,8 +21,9 @@
 #include <memory>
 #include <type_traits>
 
+#include <android-base/logging.h>
+
 #include "base/allocator.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index aa96871..a9702a7 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -19,6 +19,8 @@
 
 #include "scoped_thread_state_change.h"
 
+#include <android-base/logging.h>
+
 #include "base/casts.h"
 #include "jni_env_ext-inl.h"
 #include "obj_ptr-inl.h"
diff --git a/runtime/scoped_thread_state_change.cc b/runtime/scoped_thread_state_change.cc
index 94354fc..6a86cc6 100644
--- a/runtime/scoped_thread_state_change.cc
+++ b/runtime/scoped_thread_state_change.cc
@@ -19,7 +19,6 @@
 #include <type_traits>
 
 #include "base/casts.h"
-#include "base/logging.h"
 #include "java_vm_ext.h"
 #include "obj_ptr-inl.h"
 #include "runtime-inl.h"
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index bf5d718..d9c4da9 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -35,6 +35,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/file_utils.h"
+#include "base/logging.h"  // For GetCmdLine.
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
diff --git a/runtime/signal_set.h b/runtime/signal_set.h
index 6f88852..5361323 100644
--- a/runtime/signal_set.h
+++ b/runtime/signal_set.h
@@ -19,7 +19,7 @@
 
 #include <signal.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index ab9fb0d..5ad1f7c 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -735,12 +735,19 @@
     return runtime->GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs);
   }
 
-  // The only remaining case is if the method is native and uses the generic JNI stub.
+  // The only remaining case is if the method is native and uses the generic JNI stub,
+  // called either directly or through some (resolution, instrumentation) trampoline.
   DCHECK(method->IsNative());
-  ClassLinker* class_linker = runtime->GetClassLinker();
-  const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method,
-                                                                           kRuntimePointerSize);
-  DCHECK(class_linker->IsQuickGenericJniStub(entry_point)) << method->PrettyMethod();
+  if (kIsDebugBuild) {
+    ClassLinker* class_linker = runtime->GetClassLinker();
+    const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method,
+                                                                             kRuntimePointerSize);
+    CHECK(class_linker->IsQuickGenericJniStub(entry_point) ||
+          // The current entrypoint (after filtering out trampolines) may have changed
+          // from GenericJNI to JIT-compiled stub since we have entered this frame.
+          (runtime->GetJit() != nullptr &&
+           runtime->GetJit()->GetCodeCache()->ContainsPc(entry_point))) << method->PrettyMethod();
+  }
   // Generic JNI frame.
   uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
   size_t scope_size = HandleScope::SizeOf(handle_refs);
@@ -776,8 +783,48 @@
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == nullptr);
       ArtMethod* method = *cur_quick_frame_;
+      DCHECK(method != nullptr);
+      bool header_retrieved = false;
+      if (method->IsNative()) {
+        // We do not have a PC for the first frame, so we cannot simply use
+        // ArtMethod::GetOatQuickMethodHeader() as we're unable to distinguish there
+        // between GenericJNI frame and JIT-compiled JNI stub; the entrypoint may have
+        // changed since the frame was entered. The top quick frame tag indicates
+        // GenericJNI here, otherwise it's either AOT-compiled or JNI-compiled JNI stub.
+        if (UNLIKELY(current_fragment->GetTopQuickFrameTag())) {
+          // The generic JNI does not have any method header.
+          cur_oat_quick_method_header_ = nullptr;
+        } else {
+          const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode();
+          CHECK(existing_entry_point != nullptr);
+          Runtime* runtime = Runtime::Current();
+          ClassLinker* class_linker = runtime->GetClassLinker();
+          // Check whether we can quickly get the header from the current entrypoint.
+          if (!class_linker->IsQuickGenericJniStub(existing_entry_point) &&
+              !class_linker->IsQuickResolutionStub(existing_entry_point) &&
+              existing_entry_point != GetQuickInstrumentationEntryPoint()) {
+            cur_oat_quick_method_header_ =
+                OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+          } else {
+            const void* code = method->GetOatMethodQuickCode(class_linker->GetImagePointerSize());
+            if (code != nullptr) {
+              cur_oat_quick_method_header_ = OatQuickMethodHeader::FromEntryPoint(code);
+            } else {
+              // This must be a JITted JNI stub frame.
+              CHECK(runtime->GetJit() != nullptr);
+              code = runtime->GetJit()->GetCodeCache()->GetJniStubCode(method);
+              CHECK(code != nullptr) << method->PrettyMethod();
+              cur_oat_quick_method_header_ = OatQuickMethodHeader::FromCodePointer(code);
+            }
+          }
+        }
+        header_retrieved = true;
+      }
       while (method != nullptr) {
-        cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_);
+        if (!header_retrieved) {
+          cur_oat_quick_method_header_ = method->GetOatQuickMethodHeader(cur_quick_frame_pc_);
+        }
+        header_retrieved = false;  // Force header retrieval in next iteration.
         SanityCheckFrame();
 
         if ((walk_kind_ == StackWalkKind::kIncludeInlinedFrames)
diff --git a/runtime/stack.h b/runtime/stack.h
index bd6204f..a16930b 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -140,8 +140,7 @@
   };
 
   template <CountTransitions kCount = CountTransitions::kYes>
-  void WalkStack(bool include_transitions = false)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  void WalkStack(bool include_transitions = false) REQUIRES_SHARED(Locks::mutator_lock_);
 
   Thread* GetThread() const {
     return thread_;
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index 0560c33..511c2c6 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -19,7 +19,7 @@
 
 #include <iterator>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/runtime/string_reference.h b/runtime/string_reference.h
index d0ab4e4..24a4253 100644
--- a/runtime/string_reference.h
+++ b/runtime/string_reference.h
@@ -19,7 +19,8 @@
 
 #include <stdint.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "dex_file-inl.h"
 #include "dex_file_reference.h"
 #include "dex_file_types.h"
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index b5a9626..62b0789 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,6 +19,7 @@
 
 #include "thread.h"
 
+#include "base/aborting.h"
 #include "base/casts.h"
 #include "base/mutex-inl.h"
 #include "base/time_utils.h"
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 712eabc..cb350ed 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1884,9 +1884,7 @@
   }
 
   // Threads with no managed stack frames should be shown.
-  const ManagedStack* managed_stack = thread->GetManagedStack();
-  if (managed_stack == nullptr || (managed_stack->GetTopQuickFrame() == nullptr &&
-      managed_stack->GetTopShadowFrame() == nullptr)) {
+  if (!thread->HasManagedStack()) {
     return true;
   }
 
@@ -3702,6 +3700,7 @@
 
 void Thread::SetAsyncException(ObjPtr<mirror::Throwable> new_exception) {
   CHECK(new_exception != nullptr);
+  Runtime::Current()->SetAsyncExceptionsThrown();
   if (kIsDebugBuild) {
     // Make sure we are in a checkpoint.
     MutexLock mu(Thread::Current(), *Locks::thread_suspend_count_lock_);
diff --git a/runtime/thread.h b/runtime/thread.h
index 39be66d..0803975 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -474,13 +474,16 @@
     tlsPtr_.managed_stack.SetTopQuickFrame(top_method);
   }
 
+  void SetTopOfStackTagged(ArtMethod** top_method) {
+    tlsPtr_.managed_stack.SetTopQuickFrameTagged(top_method);
+  }
+
   void SetTopOfShadowStack(ShadowFrame* top) {
     tlsPtr_.managed_stack.SetTopShadowFrame(top);
   }
 
   bool HasManagedStack() const {
-    return (tlsPtr_.managed_stack.GetTopQuickFrame() != nullptr) ||
-        (tlsPtr_.managed_stack.GetTopShadowFrame() != nullptr);
+    return tlsPtr_.managed_stack.HasTopQuickFrame() || tlsPtr_.managed_stack.HasTopShadowFrame();
   }
 
   // If 'msg' is null, no detail message is set.
@@ -833,7 +836,7 @@
   static ThreadOffset<pointer_size> TopOfManagedStackOffset() {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, managed_stack) +
-        ManagedStack::TopQuickFrameOffset());
+        ManagedStack::TaggedTopQuickFrameOffset());
   }
 
   const ManagedStack* GetManagedStack() const {
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index b922d94..9673eee 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -14,9 +14,11 @@
  * limitations under the License.
  */
 
+#include "thread.h"
+
 #include <signal.h>
 
-#include "thread.h"
+#include "base/logging.h"  // For VLOG.
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 9f55314..e43b9f4 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -28,6 +28,7 @@
 #include "nativehelper/scoped_local_ref.h"
 #include "nativehelper/scoped_utf_chars.h"
 
+#include "base/aborting.h"
 #include "base/histogram-inl.h"
 #include "base/mutex-inl.h"
 #include "base/systrace.h"
@@ -364,11 +365,11 @@
   // Run the checkpoint on the suspended threads.
   for (const auto& thread : suspended_count_modified_threads) {
     if (!thread->IsSuspended()) {
-      if (ATRACE_ENABLED()) {
+      ScopedTrace trace([&]() {
         std::ostringstream oss;
         thread->ShortDump(oss);
-        ATRACE_BEGIN((std::string("Waiting for suspension of thread ") + oss.str()).c_str());
-      }
+        return std::string("Waiting for suspension of thread ") + oss.str();
+      });
       // Busy wait until the thread is suspended.
       const uint64_t start_time = NanoTime();
       do {
@@ -377,7 +378,6 @@
       const uint64_t total_delay = NanoTime() - start_time;
       // Shouldn't need to wait for longer than 1000 microseconds.
       constexpr uint64_t kLongWaitThreshold = MsToNs(1);
-      ATRACE_END();
       if (UNLIKELY(total_delay > kLongWaitThreshold)) {
         LOG(WARNING) << "Long wait of " << PrettyDuration(total_delay) << " for "
             << *thread << " suspension!";
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index cffaffc..386cdf0 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -22,11 +22,11 @@
 
 #include <pthread.h>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
 #include "base/bit_utils.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/time_utils.h"
 #include "runtime.h"
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index e923aff..c9766bc 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -16,7 +16,8 @@
 
 #include "transaction.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/stl_util.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc_root-inl.h"
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index e52dd08..304017e 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -493,7 +493,7 @@
   dex::StringIndex string_idx = dex_file->GetIndexForStringId(*string_id);
   ASSERT_TRUE(string_idx.IsValid());
   // String should only get resolved by the initializer.
-  EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache.Get()) == nullptr);
+  EXPECT_TRUE(class_linker_->LookupString(string_idx, h_dex_cache.Get()) == nullptr);
   EXPECT_TRUE(h_dex_cache->GetResolvedString(string_idx) == nullptr);
   // Do the transaction, then roll back.
   Runtime::Current()->EnterTransactionMode();
@@ -502,14 +502,15 @@
   ASSERT_TRUE(h_klass->IsInitialized());
   // Make sure the string got resolved by the transaction.
   {
-    mirror::String* s = class_linker_->LookupString(*dex_file, string_idx, h_dex_cache.Get());
+    ObjPtr<mirror::String> s =
+        class_linker_->LookupString(string_idx, h_dex_cache.Get());
     ASSERT_TRUE(s != nullptr);
     EXPECT_STREQ(s->ToModifiedUtf8().c_str(), kResolvedString);
-    EXPECT_EQ(s, h_dex_cache->GetResolvedString(string_idx));
+    EXPECT_EQ(s.Ptr(), h_dex_cache->GetResolvedString(string_idx));
   }
   Runtime::Current()->RollbackAndExitTransactionMode();
   // Check that the string did not stay resolved.
-  EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache.Get()) == nullptr);
+  EXPECT_TRUE(class_linker_->LookupString(string_idx, h_dex_cache.Get()) == nullptr);
   EXPECT_TRUE(h_dex_cache->GetResolvedString(string_idx) == nullptr);
   ASSERT_FALSE(h_klass->IsInitialized());
   ASSERT_FALSE(soa.Self()->IsExceptionPending());
diff --git a/runtime/type_reference.h b/runtime/type_reference.h
index f7daa2b..10a67b1 100644
--- a/runtime/type_reference.h
+++ b/runtime/type_reference.h
@@ -19,7 +19,8 @@
 
 #include <stdint.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "dex_file_types.h"
 #include "string_reference.h"
 
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 7e06482..93fcb32 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -16,7 +16,8 @@
 
 #include "utf.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "mirror/array.h"
 #include "mirror/object-inl.h"
 #include "utf-inl.h"
diff --git a/runtime/utils.h b/runtime/utils.h
index ede32dc..789498c 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -23,9 +23,10 @@
 #include <random>
 #include <string>
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
 #include "base/casts.h"
-#include "base/logging.h"
 #include "base/stringpiece.h"
 #include "globals.h"
 #include "primitive.h"
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index 9d4e9fb..855b856 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -19,8 +19,9 @@
 
 #include "dex_cache_arrays_layout.h"
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "gc_root.h"
 #include "globals.h"
 #include "mirror/dex_cache.h"
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index fb9d24f..fe768a1 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -20,8 +20,9 @@
 
 #include <memory>
 
+#include <android-base/logging.h>
+
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file.h"
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 3e08826..2d9fcab 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -68,6 +68,18 @@
     uint32_t GetQuickeningInfoSize() const { return quickening_info_size_; }
     uint32_t GetNumberOfDexFiles() const { return number_of_dex_files_; }
 
+    size_t GetComputedFileSize() const {
+      return sizeof(Header) +
+             GetSizeOfChecksumsSection() +
+             GetDexSize() +
+             GetVerifierDepsSize() +
+             GetQuickeningInfoSize();
+    }
+
+    size_t GetSizeOfChecksumsSection() const {
+      return sizeof(VdexChecksum) * GetNumberOfDexFiles();
+    }
+
     static constexpr uint8_t kVdexInvalidMagic[] = { 'w', 'd', 'e', 'x' };
 
    private:
@@ -172,17 +184,13 @@
   }
 
   const uint8_t* DexBegin() const {
-    return Begin() + sizeof(Header) + GetSizeOfChecksumsSection();
+    return Begin() + sizeof(Header) + GetHeader().GetSizeOfChecksumsSection();
   }
 
   const uint8_t* DexEnd() const {
     return DexBegin() + GetHeader().GetDexSize();
   }
 
-  size_t GetSizeOfChecksumsSection() const {
-    return sizeof(VdexChecksum) * GetHeader().GetNumberOfDexFiles();
-  }
-
   uint32_t GetDexFileIndex(const DexFile& dex_file) const;
 
   std::unique_ptr<MemMap> mmap_;
diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h
index a7fa9f3..445a6ff 100644
--- a/runtime/verifier/method_verifier-inl.h
+++ b/runtime/verifier/method_verifier-inl.h
@@ -19,7 +19,8 @@
 
 #include "method_verifier.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "handle_scope-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index a75157d..4ff49ed 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -22,8 +22,9 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/aborting.h"
 #include "base/enums.h"
-#include "base/logging.h"
+#include "base/logging.h"  // For VLOG.
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -61,8 +62,6 @@
 using android::base::StringPrintf;
 
 static constexpr bool kTimeVerifyMethod = !kIsDebugBuild;
-static constexpr bool kDebugVerify = false;
-// TODO: Add a constant to method_verifier to turn on verbose logging?
 
 // On VLOG(verifier), should we dump the whole state when we run into a hard failure?
 static constexpr bool kDumpRegLinesOnHardFailureIfVLOG = true;
@@ -231,7 +230,7 @@
     previous_method_idx = method_idx;
     InvokeType type = it->GetMethodInvokeType(class_def);
     ArtMethod* method = linker->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
-        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
+        method_idx, dex_cache, class_loader, /* referrer */ nullptr, type);
     if (method == nullptr) {
       DCHECK(self->IsExceptionPending());
       // We couldn't resolve the method, but continue regardless.
@@ -284,7 +283,7 @@
                                         bool allow_soft_failures,
                                         HardFailLogMode log_level,
                                         std::string* error) {
-  ScopedTrace trace(__FUNCTION__);
+  SCOPED_TRACE << "VerifyClass " << PrettyDescriptor(dex_file->GetClassDescriptor(class_def));
 
   // A class must not be abstract and final.
   if ((class_def.access_flags_ & (kAccAbstract | kAccFinal)) == (kAccAbstract | kAccFinal)) {
@@ -408,6 +407,10 @@
         verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
                                                     << dex_file->PrettyMethod(method_idx) << "\n");
       }
+      if (VLOG_IS_ON(verifier_debug)) {
+        std::cout << "\n" << verifier.info_messages_.str();
+        verifier.Dump(std::cout);
+      }
       result.kind = FailureKind::kSoftFailure;
       if (method != nullptr &&
           !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
@@ -481,7 +484,7 @@
         callbacks->ClassRejected(ref);
       }
     }
-    if (VLOG_IS_ON(verifier)) {
+    if (VLOG_IS_ON(verifier) || VLOG_IS_ON(verifier_debug)) {
       std::cout << "\n" << verifier.info_messages_.str();
       verifier.Dump(std::cout);
     }
@@ -592,8 +595,10 @@
   STLDeleteElements(&failure_messages_);
 }
 
-void MethodVerifier::FindLocksAtDexPc(ArtMethod* m, uint32_t dex_pc,
-                                      std::vector<uint32_t>* monitor_enter_dex_pcs) {
+void MethodVerifier::FindLocksAtDexPc(
+    ArtMethod* m,
+    uint32_t dex_pc,
+    std::vector<MethodVerifier::DexLockInfo>* monitor_enter_dex_pcs) {
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(m->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(m->GetClassLoader()));
@@ -1072,9 +1077,8 @@
       // Ensure exception types are resolved so that they don't need resolution to be delivered,
       // unresolved exception types will be ignored by exception delivery
       if (iterator.GetHandlerTypeIndex().IsValid()) {
-        mirror::Class* exception_type = linker->ResolveType(*dex_file_,
-                                                            iterator.GetHandlerTypeIndex(),
-                                                            dex_cache_, class_loader_);
+        ObjPtr<mirror::Class> exception_type =
+            linker->ResolveType(iterator.GetHandlerTypeIndex(), dex_cache_, class_loader_);
         if (exception_type == nullptr) {
           DCHECK(self_->IsExceptionPending());
           self_->ClearException();
@@ -1933,7 +1937,7 @@
     GetInstructionFlags(insn_idx).ClearChanged();
   }
 
-  if (kDebugVerify) {
+  if (UNLIKELY(VLOG_IS_ON(verifier_debug))) {
     /*
      * Scan for dead code. There's nothing "evil" about dead code
      * (besides the wasted space), but it indicates a flaw somewhere
@@ -2036,8 +2040,20 @@
   // for a thread to be suspended).
   if (monitor_enter_dex_pcs_ != nullptr && work_insn_idx_ == interesting_dex_pc_) {
     monitor_enter_dex_pcs_->clear();  // The new work line is more accurate than the previous one.
-    for (size_t i = 0; i < work_line_->GetMonitorEnterCount(); ++i) {
-      monitor_enter_dex_pcs_->push_back(work_line_->GetMonitorEnterDexPc(i));
+
+    std::map<uint32_t, DexLockInfo> depth_to_lock_info;
+    auto collector = [&](uint32_t dex_reg, uint32_t depth) {
+      auto insert_pair = depth_to_lock_info.emplace(depth, DexLockInfo(depth));
+      auto it = insert_pair.first;
+      auto set_insert_pair = it->second.dex_registers.insert(dex_reg);
+      DCHECK(set_insert_pair.second);
+    };
+    work_line_->IterateRegToLockDepths(collector);
+    for (auto& pair : depth_to_lock_info) {
+      monitor_enter_dex_pcs_->push_back(pair.second);
+      // Map depth to dex PC.
+      (*monitor_enter_dex_pcs_)[monitor_enter_dex_pcs_->size() - 1].dex_pc =
+          work_line_->GetMonitorEnterDexPc(pair.second.dex_pc);
     }
   }
 
@@ -2065,7 +2081,7 @@
 
   int32_t branch_target = 0;
   bool just_set_result = false;
-  if (kDebugVerify) {
+  if (UNLIKELY(VLOG_IS_ON(verifier_debug))) {
     // Generate processing back trace to debug verifier
     LogVerifyInfo() << "Processing " << inst->DumpString(dex_file_) << "\n"
                     << work_line_->Dump(this) << "\n";
@@ -2223,7 +2239,7 @@
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-object not expected";
         } else {
           /* return_type is the *expected* return type, not register value */
-          DCHECK(!return_type.IsZero());
+          DCHECK(!return_type.IsZeroOrNull());
           DCHECK(!return_type.IsUninitializedReference());
           const uint32_t vregA = inst->VRegA_11x();
           const RegType& reg_type = work_line_->GetRegisterType(this, vregA);
@@ -2417,8 +2433,8 @@
       const RegType& res_type = ResolveClass<CheckAccess::kYes>(type_idx);
       if (res_type.IsConflict()) {
         // If this is a primitive type, fail HARD.
-        ObjPtr<mirror::Class> klass =
-            ClassLinker::LookupResolvedType(type_idx, dex_cache_.Get(), class_loader_.Get());
+        ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
+            type_idx, dex_cache_.Get(), class_loader_.Get());
         if (klass != nullptr && klass->IsPrimitive()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "using primitive type "
               << dex_file_->StringByTypeIdx(type_idx) << " in instanceof in "
@@ -2471,7 +2487,8 @@
     case Instruction::ARRAY_LENGTH: {
       const RegType& res_type = work_line_->GetRegisterType(this, inst->VRegB_12x());
       if (res_type.IsReferenceTypes()) {
-        if (!res_type.IsArrayTypes() && !res_type.IsZero()) {  // ie not an array or null
+        if (!res_type.IsArrayTypes() && !res_type.IsZeroOrNull()) {
+          // ie not an array or null
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "array-length on non-array " << res_type;
         } else {
           work_line_->SetRegisterType<LockOp::kClear>(this,
@@ -2578,7 +2595,7 @@
       /* Similar to the verification done for APUT */
       const RegType& array_type = work_line_->GetRegisterType(this, inst->VRegA_31t());
       /* array_type can be null if the reg type is Zero */
-      if (!array_type.IsZero()) {
+      if (!array_type.IsZeroOrNull()) {
         if (!array_type.IsArrayTypes()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid fill-array-data with array type "
                                             << array_type;
@@ -2618,7 +2635,7 @@
       const RegType& reg_type1 = work_line_->GetRegisterType(this, inst->VRegA_22t());
       const RegType& reg_type2 = work_line_->GetRegisterType(this, inst->VRegB_22t());
       bool mismatch = false;
-      if (reg_type1.IsZero()) {  // zero then integral or reference expected
+      if (reg_type1.IsZeroOrNull()) {  // zero then integral or reference expected
         mismatch = !reg_type2.IsReferenceTypes() && !reg_type2.IsIntegralTypes();
       } else if (reg_type1.IsReferenceTypes()) {  // both references?
         mismatch = !reg_type2.IsReferenceTypes();
@@ -2703,7 +2720,7 @@
             !cast_type.IsUnresolvedTypes() && !orig_type.IsUnresolvedTypes() &&
             cast_type.HasClass() &&             // Could be conflict type, make sure it has a class.
             !cast_type.GetClass()->IsInterface() &&
-            (orig_type.IsZero() ||
+            (orig_type.IsZeroOrNull() ||
                 orig_type.IsStrictlyAssignableFrom(
                     cast_type.Merge(orig_type, &reg_types_, this), this))) {
           RegisterLine* update_line = RegisterLine::Create(code_item_accessor_.RegistersSize(),
@@ -2991,7 +3008,7 @@
           break;
 
         /* no null refs allowed (?) */
-        if (this_type.IsZero()) {
+        if (this_type.IsZeroOrNull()) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unable to initialize null ref";
           break;
         }
@@ -3068,7 +3085,7 @@
        * interface or Object (see comments in RegType::JoinClass).
        */
       const RegType& this_type = work_line_->GetInvocationThis(this, inst);
-      if (this_type.IsZero()) {
+      if (this_type.IsZeroOrNull()) {
         /* null pointer always passes (and always fails at runtime) */
       } else {
         if (this_type.IsUninitializedTypes()) {
@@ -3624,8 +3641,8 @@
         has_catch_all_handler = true;
       } else {
         // It is also a catch-all if it is java.lang.Throwable.
-        mirror::Class* klass = linker->ResolveType(*dex_file_, handler_type_idx, dex_cache_,
-                                                   class_loader_);
+        ObjPtr<mirror::Class> klass =
+            linker->ResolveType(handler_type_idx, dex_cache_, class_loader_);
         if (klass != nullptr) {
           if (klass == mirror::Throwable::GetJavaLangThrowable()) {
             has_catch_all_handler = true;
@@ -3743,16 +3760,16 @@
                                            << "non-instantiable klass " << descriptor;
 }
 
-inline bool MethodVerifier::IsInstantiableOrPrimitive(mirror::Class* klass) {
+inline bool MethodVerifier::IsInstantiableOrPrimitive(ObjPtr<mirror::Class> klass) {
   return klass->IsInstantiable() || klass->IsPrimitive();
 }
 
 template <MethodVerifier::CheckAccess C>
 const RegType& MethodVerifier::ResolveClass(dex::TypeIndex class_idx) {
-  mirror::Class* klass = can_load_classes_
-      ? Runtime::Current()->GetClassLinker()->ResolveType(
-          *dex_file_, class_idx, dex_cache_, class_loader_)
-      : ClassLinker::LookupResolvedType(class_idx, dex_cache_.Get(), class_loader_.Get()).Ptr();
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  ObjPtr<mirror::Class> klass = can_load_classes_
+      ? linker->ResolveType(class_idx, dex_cache_, class_loader_)
+      : linker->LookupResolvedType(class_idx, dex_cache_.Get(), class_loader_.Get());
   if (can_load_classes_ && klass == nullptr) {
     DCHECK(self_->IsExceptionPending());
     self_->ClearException();
@@ -3765,10 +3782,10 @@
       UninstantiableError(descriptor);
       precise = false;
     }
-    result = reg_types_.FindClass(klass, precise);
+    result = reg_types_.FindClass(klass.Ptr(), precise);
     if (result == nullptr) {
       const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
-      result = reg_types_.InsertClass(descriptor, klass, precise);
+      result = reg_types_.InsertClass(descriptor, klass.Ptr(), precise);
     }
   } else {
     const char* descriptor = dex_file_->StringByTypeIdx(class_idx);
@@ -3783,7 +3800,7 @@
   }
 
   // Record result of class resolution attempt.
-  VerifierDeps::MaybeRecordClassResolution(*dex_file_, class_idx, klass);
+  VerifierDeps::MaybeRecordClassResolution(*dex_file_, class_idx, klass.Ptr());
 
   // If requested, check if access is allowed. Unresolved types are included in this check, as the
   // interpreter only tests whether access is allowed when a class is not pre-verified and runs in
@@ -4067,7 +4084,7 @@
     const RegType& adjusted_type = is_init
                                        ? GetRegTypeCache()->FromUninitialized(actual_arg_type)
                                        : actual_arg_type;
-    if (method_type != METHOD_INTERFACE && !adjusted_type.IsZero()) {
+    if (method_type != METHOD_INTERFACE && !adjusted_type.IsZeroOrNull()) {
       const RegType* res_method_class;
       // Miranda methods have the declaring interface as their declaring class, not the abstract
       // class. It would be wrong to use this for the type check (interface type checks are
@@ -4440,7 +4457,7 @@
 
 bool MethodVerifier::CheckSignaturePolymorphicReceiver(const Instruction* inst) {
   const RegType& this_type = work_line_->GetInvocationThis(this, inst);
-  if (this_type.IsZero()) {
+  if (this_type.IsZeroOrNull()) {
     /* null pointer always passes (and always fails at run time) */
     return true;
   } else if (!this_type.IsNonZeroReferenceTypes()) {
@@ -4559,7 +4576,7 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "'this' arg must be initialized";
     return nullptr;
   }
-  if (!actual_arg_type.IsZero()) {
+  if (!actual_arg_type.IsZeroOrNull()) {
     mirror::Class* klass = res_method->GetDeclaringClass();
     std::string temp;
     const RegType& res_method_class =
@@ -4675,13 +4692,20 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Invalid reg type for array index (" << index_type << ")";
   } else {
     const RegType& array_type = work_line_->GetRegisterType(this, inst->VRegB_23x());
-    if (array_type.IsZero()) {
-      have_pending_runtime_throw_failure_ = true;
+    if (array_type.IsZeroOrNull()) {
       // Null array class; this code path will fail at runtime. Infer a merge-able type from the
-      // instruction type. TODO: have a proper notion of bottom here.
-      if (!is_primitive || insn_type.IsCategory1Types()) {
-        // Reference or category 1
-        work_line_->SetRegisterType<LockOp::kClear>(this, inst->VRegA_23x(), reg_types_.Zero());
+      // instruction type.
+      if (!is_primitive) {
+        work_line_->SetRegisterType<LockOp::kClear>(this, inst->VRegA_23x(), reg_types_.Null());
+      } else if (insn_type.IsInteger()) {
+        // Pick a non-zero constant (to distinguish with null) that can fit in any primitive.
+        // We cannot use 'insn_type' as it could be a float array or an int array.
+        work_line_->SetRegisterType<LockOp::kClear>(
+            this, inst->VRegA_23x(), DetermineCat1Constant(1, need_precise_constants_));
+      } else if (insn_type.IsCategory1Types()) {
+        // Category 1
+        // The 'insn_type' is exactly the type we need.
+        work_line_->SetRegisterType<LockOp::kClear>(this, inst->VRegA_23x(), insn_type);
       } else {
         // Category 2
         work_line_->SetRegisterTypeWide(this, inst->VRegA_23x(),
@@ -4790,7 +4814,7 @@
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Invalid reg type for array index (" << index_type << ")";
   } else {
     const RegType& array_type = work_line_->GetRegisterType(this, inst->VRegB_23x());
-    if (array_type.IsZero()) {
+    if (array_type.IsZeroOrNull()) {
       // Null array type; this code path will fail at runtime.
       // Still check that the given value matches the instruction's type.
       // Note: this is, as usual, complicated by the fact the the instruction isn't fully typed
@@ -4859,7 +4883,7 @@
     return nullptr;  // Can't resolve Class so no more to do here, will do checking at runtime.
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, dex_cache_, class_loader_);
+  ArtField* field = class_linker->ResolveFieldJLS(field_idx, dex_cache_, class_loader_);
 
   // Record result of the field resolution attempt.
   VerifierDeps::MaybeRecordFieldResolution(*dex_file_, field_idx, field);
@@ -4900,7 +4924,7 @@
     return nullptr;  // Can't resolve Class so no more to do here
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, dex_cache_, class_loader_);
+  ArtField* field = class_linker->ResolveFieldJLS(field_idx, dex_cache_, class_loader_);
 
   // Record result of the field resolution attempt.
   VerifierDeps::MaybeRecordFieldResolution(*dex_file_, field_idx, field);
@@ -4912,7 +4936,7 @@
     DCHECK(self_->IsExceptionPending());
     self_->ClearException();
     return nullptr;
-  } else if (obj_type.IsZero()) {
+  } else if (obj_type.IsZeroOrNull()) {
     // Cannot infer and check type, however, access will cause null pointer exception.
     // Fall through into a few last soft failure checks below.
   } else if (!obj_type.IsReferenceTypes()) {
@@ -5024,7 +5048,7 @@
     }
 
     ObjPtr<mirror::Class> field_type_class =
-        can_load_classes_ ? field->ResolveType() : field->LookupType();
+        can_load_classes_ ? field->ResolveType() : field->LookupResolvedType();
     if (field_type_class != nullptr) {
       field_type = &FromClass(field->GetTypeDescriptor(),
                               field_type_class.Ptr(),
@@ -5140,10 +5164,11 @@
   }
   uint32_t field_offset = static_cast<uint32_t>(inst->VRegC_22c());
   ArtField* const f = ArtField::FindInstanceFieldWithOffset(object_type.GetClass(), field_offset);
-  DCHECK_EQ(f->GetOffset().Uint32Value(), field_offset);
   if (f == nullptr) {
     VLOG(verifier) << "Failed to find instance field at offset '" << field_offset
                    << "' from '" << mirror::Class::PrettyDescriptor(object_type.GetClass()) << "'";
+  } else {
+    DCHECK_EQ(f->GetOffset().Uint32Value(), field_offset);
   }
   return f;
 }
@@ -5172,7 +5197,7 @@
   const RegType* field_type;
   {
     ObjPtr<mirror::Class> field_type_class =
-        can_load_classes_ ? field->ResolveType() : field->LookupType();
+        can_load_classes_ ? field->ResolveType() : field->LookupResolvedType();
 
     if (field_type_class != nullptr) {
       field_type = &FromClass(field->GetTypeDescriptor(),
@@ -5327,7 +5352,7 @@
     }
   } else {
     RegisterLineArenaUniquePtr copy;
-    if (kDebugVerify) {
+    if (UNLIKELY(VLOG_IS_ON(verifier_debug))) {
       copy.reset(RegisterLine::Create(target_line->NumRegs(), this));
       copy->CopyFromLine(target_line);
     }
@@ -5335,7 +5360,7 @@
     if (have_pending_hard_failure_) {
       return false;
     }
-    if (kDebugVerify && changed) {
+    if (UNLIKELY(VLOG_IS_ON(verifier_debug)) && changed) {
       LogVerifyInfo() << "Merging at [" << reinterpret_cast<void*>(work_insn_idx_) << "]"
                       << " to [" << reinterpret_cast<void*>(next_insn) << "]: " << "\n"
                       << copy->Dump(this) << "  MERGE\n"
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 813ce87..f26f3e2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -149,10 +149,21 @@
   void Dump(std::ostream& os) REQUIRES_SHARED(Locks::mutator_lock_);
   void Dump(VariableIndentationOutputStream* vios) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Information structure for a lock held at a certain point in time.
+  struct DexLockInfo {
+    // The registers aliasing the lock.
+    std::set<uint32_t> dex_registers;
+    // The dex PC of the monitor-enter instruction.
+    uint32_t dex_pc;
+
+    explicit DexLockInfo(uint32_t dex_pc_in) {
+      dex_pc = dex_pc_in;
+    }
+  };
   // Fills 'monitor_enter_dex_pcs' with the dex pcs of the monitor-enter instructions corresponding
   // to the locks held at 'dex_pc' in method 'm'.
   static void FindLocksAtDexPc(ArtMethod* m, uint32_t dex_pc,
-                               std::vector<uint32_t>* monitor_enter_dex_pcs)
+                               std::vector<DexLockInfo>* monitor_enter_dex_pcs)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the accessed field corresponding to the quick instruction's field
@@ -246,7 +257,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void UninstantiableError(const char* descriptor);
-  static bool IsInstantiableOrPrimitive(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  static bool IsInstantiableOrPrimitive(ObjPtr<mirror::Class> klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Is the method being verified a constructor? See the comment on the field.
   bool IsConstructor() const {
@@ -750,7 +762,7 @@
   uint32_t interesting_dex_pc_;
   // The container into which FindLocksAtDexPc should write the registers containing held locks,
   // null if we're not doing FindLocksAtDexPc.
-  std::vector<uint32_t>* monitor_enter_dex_pcs_;
+  std::vector<DexLockInfo>* monitor_enter_dex_pcs_;
 
   // The types of any error that occurs.
   std::vector<VerifyError> failures_;
diff --git a/runtime/verifier/reg_type-inl.h b/runtime/verifier/reg_type-inl.h
index 631c6bd..f719782 100644
--- a/runtime/verifier/reg_type-inl.h
+++ b/runtime/verifier/reg_type-inl.h
@@ -29,6 +29,8 @@
 namespace verifier {
 
 inline bool RegType::CanAccess(const RegType& other) const {
+  DCHECK(IsReferenceTypes());
+  DCHECK(!IsNull());
   if (Equals(other)) {
     return true;  // Trivial accessibility.
   } else {
@@ -45,9 +47,13 @@
 }
 
 inline bool RegType::CanAccessMember(ObjPtr<mirror::Class> klass, uint32_t access_flags) const {
+  DCHECK(IsReferenceTypes());
   if ((access_flags & kAccPublic) != 0) {
     return true;
   }
+  if (IsNull()) {
+    return true;
+  }
   if (!IsUnresolvedTypes()) {
     return GetClass()->CanAccessMember(klass, access_flags);
   } else {
@@ -92,7 +98,7 @@
         LOG(WARNING) << "RegType::AssignableFrom lhs is Conflict!";
         return false;
       case AssignmentType::kReference:
-        if (rhs.IsZero()) {
+        if (rhs.IsZeroOrNull()) {
           return true;  // All reference types can be assigned null.
         } else if (!rhs.IsReferenceTypes()) {
           return false;  // Expect rhs to be a reference type.
@@ -119,6 +125,7 @@
           return result;
         } else {
           // Unresolved types are only assignable for null and equality.
+          // Null cannot be the left-hand side.
           return false;
         }
       case AssignmentType::kNotAssignable:
@@ -199,6 +206,11 @@
   return instance_;
 }
 
+inline const NullType* NullType::GetInstance() {
+  DCHECK(instance_ != nullptr);
+  return instance_;
+}
+
 inline void* RegType::operator new(size_t size, ScopedArenaAllocator* allocator) {
   return allocator->Alloc(size, kArenaAllocMisc);
 }
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 8df2e0f..309c374 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -51,6 +51,7 @@
 const DoubleLoType* DoubleLoType::instance_ = nullptr;
 const DoubleHiType* DoubleHiType::instance_ = nullptr;
 const IntegerType* IntegerType::instance_ = nullptr;
+const NullType* NullType::instance_ = nullptr;
 
 PrimitiveType::PrimitiveType(mirror::Class* klass, const StringPiece& descriptor, uint16_t cache_id)
     : RegType(klass, descriptor, cache_id) {
@@ -581,6 +582,10 @@
   return a.IsConstantTypes() ? b : a;
 }
 
+static const RegType& SelectNonConstant2(const RegType& a, const RegType& b) {
+  return a.IsConstantTypes() ? (b.IsZero() ? a : b) : a;
+}
+
 const RegType& RegType::Merge(const RegType& incoming_type,
                               RegTypeCache* reg_types,
                               MethodVerifier* verifier) const {
@@ -695,8 +700,8 @@
       // special. They may only ever be merged with themselves (must be taken care of by the
       // caller of Merge(), see the DCHECK on entry). So mark any other merge as conflicting here.
       return conflict;
-    } else if (IsZero() || incoming_type.IsZero()) {
-      return SelectNonConstant(*this, incoming_type);  // 0 MERGE ref => ref
+    } else if (IsZeroOrNull() || incoming_type.IsZeroOrNull()) {
+      return SelectNonConstant2(*this, incoming_type);  // 0 MERGE ref => ref
     } else if (IsJavaLangObject() || incoming_type.IsJavaLangObject()) {
       return reg_types->JavaLangObject(false);  // Object MERGE ref => Object
     } else if (IsUnresolvedTypes() || incoming_type.IsUnresolvedTypes()) {
@@ -965,6 +970,21 @@
   return cmp1.CanAssignArray(cmp2, reg_types, class_loader, verifier, soft_error);
 }
 
+const NullType* NullType::CreateInstance(mirror::Class* klass,
+                                         const StringPiece& descriptor,
+                                         uint16_t cache_id) {
+  CHECK(instance_ == nullptr);
+  instance_ = new NullType(klass, descriptor, cache_id);
+  return instance_;
+}
+
+void NullType::Destroy() {
+  if (NullType::instance_ != nullptr) {
+    delete instance_;
+    instance_ = nullptr;
+  }
+}
+
 
 }  // namespace verifier
 }  // namespace art
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index a2085a3..9055849 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -129,8 +129,12 @@
   virtual bool IsConstantShort() const { return false; }
   virtual bool IsOne() const { return false; }
   virtual bool IsZero() const { return false; }
+  virtual bool IsNull() const { return false; }
   bool IsReferenceTypes() const {
-    return IsNonZeroReferenceTypes() || IsZero();
+    return IsNonZeroReferenceTypes() || IsZero() || IsNull();
+  }
+  bool IsZeroOrNull() const {
+    return IsZero() || IsNull();
   }
   virtual bool IsNonZeroReferenceTypes() const { return false; }
   bool IsCategory1Types() const {
@@ -857,6 +861,46 @@
   }
 };
 
+// Special "null" type that captures the semantics of null / bottom.
+class NullType FINAL : public RegType {
+ public:
+  bool IsNull() const OVERRIDE {
+    return true;
+  }
+
+  // Get the singleton Null instance.
+  static const NullType* GetInstance() PURE;
+
+  // Create the singleton instance.
+  static const NullType* CreateInstance(mirror::Class* klass,
+                                        const StringPiece& descriptor,
+                                        uint16_t cache_id)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static void Destroy();
+
+  std::string Dump() const OVERRIDE {
+    return "null";
+  }
+
+  AssignmentType GetAssignmentTypeImpl() const OVERRIDE {
+    return AssignmentType::kReference;
+  }
+
+  bool IsConstantTypes() const OVERRIDE {
+    return true;
+  }
+
+ private:
+  NullType(mirror::Class* klass, const StringPiece& descriptor, uint16_t cache_id)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
+
+  static const NullType* instance_;
+};
+
 // Common parent of all uninitialized types. Uninitialized types are created by
 // "new" dex
 // instructions and must be passed to a constructor.
diff --git a/runtime/verifier/reg_type_cache-inl.h b/runtime/verifier/reg_type_cache-inl.h
index 197c976..61f34af 100644
--- a/runtime/verifier/reg_type_cache-inl.h
+++ b/runtime/verifier/reg_type_cache-inl.h
@@ -81,6 +81,9 @@
 inline const ConflictType& RegTypeCache::Conflict() {
   return *ConflictType::GetInstance();
 }
+inline const NullType& RegTypeCache::Null() {
+  return *NullType::GetInstance();
+}
 
 inline const ImpreciseConstType& RegTypeCache::ByteConstant() {
   const ConstantType& result = FromCat1Const(std::numeric_limits<jbyte>::min(), false);
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 0029eb9..c68fa0f 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -16,6 +16,9 @@
 
 #include "reg_type_cache-inl.h"
 
+#include <type_traits>
+
+#include "base/aborting.h"
 #include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/casts.h"
@@ -51,8 +54,10 @@
 }
 
 void RegTypeCache::FillPrimitiveAndSmallConstantTypes() {
+  // Note: this must have the same order as CreatePrimitiveAndSmallConstantTypes.
   entries_.push_back(UndefinedType::GetInstance());
   entries_.push_back(ConflictType::GetInstance());
+  entries_.push_back(NullType::GetInstance());
   entries_.push_back(BooleanType::GetInstance());
   entries_.push_back(ByteType::GetInstance());
   entries_.push_back(ShortType::GetInstance());
@@ -304,6 +309,7 @@
     FloatType::Destroy();
     DoubleLoType::Destroy();
     DoubleHiType::Destroy();
+    NullType::Destroy();
     for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
       const PreciseConstType* type = small_precise_constants_[value - kMinSmallConstant];
       delete type;
@@ -314,33 +320,55 @@
   }
 }
 
-template <class Type>
-const Type* RegTypeCache::CreatePrimitiveTypeInstance(const std::string& descriptor) {
-  mirror::Class* klass = nullptr;
-  // Try loading the class from linker.
-  if (!descriptor.empty()) {
-    klass = art::Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
-                                                                       descriptor.c_str());
-    DCHECK(klass != nullptr);
-  }
-  const Type* entry = Type::CreateInstance(klass, descriptor, RegTypeCache::primitive_count_);
-  RegTypeCache::primitive_count_++;
-  return entry;
-}
+// Helper for create_primitive_type_instance lambda.
+namespace {
+template <typename T>
+struct TypeHelper {
+  using type = T;
+  static_assert(std::is_convertible<T*, RegType*>::value, "T must be a RegType");
+
+  const char* descriptor;
+
+  explicit TypeHelper(const char* d) : descriptor(d) {}
+};
+}  // namespace
 
 void RegTypeCache::CreatePrimitiveAndSmallConstantTypes() {
-  CreatePrimitiveTypeInstance<UndefinedType>("");
-  CreatePrimitiveTypeInstance<ConflictType>("");
-  CreatePrimitiveTypeInstance<BooleanType>("Z");
-  CreatePrimitiveTypeInstance<ByteType>("B");
-  CreatePrimitiveTypeInstance<ShortType>("S");
-  CreatePrimitiveTypeInstance<CharType>("C");
-  CreatePrimitiveTypeInstance<IntegerType>("I");
-  CreatePrimitiveTypeInstance<LongLoType>("J");
-  CreatePrimitiveTypeInstance<LongHiType>("J");
-  CreatePrimitiveTypeInstance<FloatType>("F");
-  CreatePrimitiveTypeInstance<DoubleLoType>("D");
-  CreatePrimitiveTypeInstance<DoubleHiType>("D");
+  // Note: this must have the same order as FillPrimitiveAndSmallConstantTypes.
+
+  // It is acceptable to pass on the const char* in type to CreateInstance, as all calls below are
+  // with compile-time constants that will have global lifetime. Use of the lambda ensures this
+  // code cannot leak to other users.
+  auto create_primitive_type_instance = [&](auto type) REQUIRES_SHARED(Locks::mutator_lock_) {
+    using Type = typename decltype(type)::type;
+    mirror::Class* klass = nullptr;
+    // Try loading the class from linker.
+    DCHECK(type.descriptor != nullptr);
+    if (strlen(type.descriptor) > 0) {
+      klass = art::Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
+                                                                         type.descriptor);
+      DCHECK(klass != nullptr);
+    }
+    const Type* entry = Type::CreateInstance(klass,
+                                             type.descriptor,
+                                             RegTypeCache::primitive_count_);
+    RegTypeCache::primitive_count_++;
+    return entry;
+  };
+  create_primitive_type_instance(TypeHelper<UndefinedType>(""));
+  create_primitive_type_instance(TypeHelper<ConflictType>(""));
+  create_primitive_type_instance(TypeHelper<NullType>(""));
+  create_primitive_type_instance(TypeHelper<BooleanType>("Z"));
+  create_primitive_type_instance(TypeHelper<ByteType>("B"));
+  create_primitive_type_instance(TypeHelper<ShortType>("S"));
+  create_primitive_type_instance(TypeHelper<CharType>("C"));
+  create_primitive_type_instance(TypeHelper<IntegerType>("I"));
+  create_primitive_type_instance(TypeHelper<LongLoType>("J"));
+  create_primitive_type_instance(TypeHelper<LongHiType>("J"));
+  create_primitive_type_instance(TypeHelper<FloatType>("F"));
+  create_primitive_type_instance(TypeHelper<DoubleLoType>("D"));
+  create_primitive_type_instance(TypeHelper<DoubleHiType>("D"));
+
   for (int32_t value = kMinSmallConstant; value <= kMaxSmallConstant; ++value) {
     PreciseConstType* type = new PreciseConstType(value, primitive_count_);
     small_precise_constants_[value - kMinSmallConstant] = type;
@@ -396,6 +424,9 @@
   if (resolved_parts_merged.IsConflict()) {
     return Conflict();
   }
+  if (resolved_parts_merged.IsJavaLangObject()) {
+    return resolved_parts_merged;
+  }
 
   bool resolved_merged_is_array = resolved_parts_merged.IsArrayTypes();
   if (left_unresolved_is_array || right_unresolved_is_array || resolved_merged_is_array) {
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index d090756..5277676 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -49,6 +49,7 @@
 class LongHiType;
 class LongLoType;
 class MethodVerifier;
+class NullType;
 class PreciseConstType;
 class PreciseReferenceType;
 class RegType;
@@ -123,6 +124,7 @@
   const DoubleHiType& DoubleHi() REQUIRES_SHARED(Locks::mutator_lock_);
   const UndefinedType& Undefined() REQUIRES_SHARED(Locks::mutator_lock_);
   const ConflictType& Conflict();
+  const NullType& Null();
 
   const PreciseReferenceType& JavaLangClass() REQUIRES_SHARED(Locks::mutator_lock_);
   const PreciseReferenceType& JavaLangString() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -171,9 +173,6 @@
   // verifier.
   StringPiece AddString(const StringPiece& string_piece);
 
-  template <class Type>
-  static const Type* CreatePrimitiveTypeInstance(const std::string& descriptor)
-      REQUIRES_SHARED(Locks::mutator_lock_);
   static void CreatePrimitiveAndSmallConstantTypes() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // A quick look up for popular small constants.
@@ -183,7 +182,7 @@
                                                           kMinSmallConstant + 1];
 
   static constexpr size_t kNumPrimitivesAndSmallConstants =
-      12 + (kMaxSmallConstant - kMinSmallConstant + 1);
+      13 + (kMaxSmallConstant - kMinSmallConstant + 1);
 
   // Have the well known global primitives been created?
   static bool primitive_initialized_;
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 1bc48ed..15a38f3 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -664,6 +664,368 @@
   }
 }
 
+TEST_F(RegTypeTest, MergeSemiLatticeRef) {
+  //  (Incomplete) semilattice:
+  //
+  //  Excluded for now: * category-2 types
+  //                    * interfaces
+  //                    * all of category-1 primitive types, including constants.
+  //  This is to demonstrate/codify the reference side, mostly.
+  //
+  //  Note: It is not a real semilattice because int = float makes this wonky. :-(
+  //
+  //                                       Conflict
+  //                                           |
+  //      #---------#--------------------------#-----------------------------#
+  //      |         |                                                        |
+  //      |         |                                                      Object
+  //      |         |                                                        |
+  //     int   uninit types              #---------------#--------#------------------#---------#
+  //      |                              |               |        |                  |         |
+  //      |                  unresolved-merge-types      |      Object[]           char[]   byte[]
+  //      |                              |    |  |       |        |                  |         |
+  //      |                  unresolved-types |  #------Number    #---------#        |         |
+  //      |                              |    |          |        |         |        |         |
+  //      |                              |    #--------Integer  Number[] Number[][]  |         |
+  //      |                              |               |        |         |        |         |
+  //      |                              #---------------#--------#---------#--------#---------#
+  //      |                                                       |
+  //      |                                                     null
+  //      |                                                       |
+  //      #--------------------------#----------------------------#
+  //                                 |
+  //                                 0
+
+  ArenaStack stack(Runtime::Current()->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
+  ScopedObjectAccess soa(Thread::Current());
+
+  // We cannot allow moving GC. Otherwise we'd have to ensure the reg types are updated (reference
+  // reg types store a class pointer in a GCRoot, which is normally updated through active verifiers
+  // being registered with their thread), which is unnecessarily complex.
+  Runtime::Current()->GetHeap()->IncrementDisableMovingGC(soa.Self());
+
+  RegTypeCache cache(true, allocator);
+
+  const RegType& conflict = cache.Conflict();
+  const RegType& zero = cache.Zero();
+  const RegType& null = cache.Null();
+  const RegType& int_type = cache.Integer();
+
+  const RegType& obj = cache.JavaLangObject(false);
+  const RegType& obj_arr = cache.From(nullptr, "[Ljava/lang/Object;", false);
+  ASSERT_FALSE(obj_arr.IsUnresolvedReference());
+
+  const RegType& unresolved_a = cache.From(nullptr, "Ldoes/not/resolve/A;", false);
+  ASSERT_TRUE(unresolved_a.IsUnresolvedReference());
+  const RegType& unresolved_b = cache.From(nullptr, "Ldoes/not/resolve/B;", false);
+  ASSERT_TRUE(unresolved_b.IsUnresolvedReference());
+  const RegType& unresolved_ab = cache.FromUnresolvedMerge(unresolved_a, unresolved_b, nullptr);
+  ASSERT_TRUE(unresolved_ab.IsUnresolvedMergedReference());
+
+  const RegType& uninit_this = cache.UninitializedThisArgument(obj);
+  const RegType& uninit_obj_0 = cache.Uninitialized(obj, 0u);
+  const RegType& uninit_obj_1 = cache.Uninitialized(obj, 1u);
+
+  const RegType& uninit_unres_this = cache.UninitializedThisArgument(unresolved_a);
+  const RegType& uninit_unres_a_0 = cache.Uninitialized(unresolved_a, 0);
+  const RegType& uninit_unres_b_0 = cache.Uninitialized(unresolved_b, 0);
+
+  const RegType& number = cache.From(nullptr, "Ljava/lang/Number;", false);
+  ASSERT_FALSE(number.IsUnresolvedReference());
+  const RegType& integer = cache.From(nullptr, "Ljava/lang/Integer;", false);
+  ASSERT_FALSE(integer.IsUnresolvedReference());
+
+  const RegType& uninit_number_0 = cache.Uninitialized(number, 0u);
+  const RegType& uninit_integer_0 = cache.Uninitialized(integer, 0u);
+
+  const RegType& number_arr = cache.From(nullptr, "[Ljava/lang/Number;", false);
+  ASSERT_FALSE(number_arr.IsUnresolvedReference());
+  const RegType& integer_arr = cache.From(nullptr, "[Ljava/lang/Integer;", false);
+  ASSERT_FALSE(integer_arr.IsUnresolvedReference());
+
+  const RegType& number_arr_arr = cache.From(nullptr, "[[Ljava/lang/Number;", false);
+  ASSERT_FALSE(number_arr_arr.IsUnresolvedReference());
+
+  const RegType& char_arr = cache.From(nullptr, "[C", false);
+  ASSERT_FALSE(char_arr.IsUnresolvedReference());
+  const RegType& byte_arr = cache.From(nullptr, "[B", false);
+  ASSERT_FALSE(byte_arr.IsUnresolvedReference());
+
+  const RegType& unresolved_a_num = cache.FromUnresolvedMerge(unresolved_a, number, nullptr);
+  ASSERT_TRUE(unresolved_a_num.IsUnresolvedMergedReference());
+  const RegType& unresolved_b_num = cache.FromUnresolvedMerge(unresolved_b, number, nullptr);
+  ASSERT_TRUE(unresolved_b_num.IsUnresolvedMergedReference());
+  const RegType& unresolved_ab_num = cache.FromUnresolvedMerge(unresolved_ab, number, nullptr);
+  ASSERT_TRUE(unresolved_ab_num.IsUnresolvedMergedReference());
+
+  const RegType& unresolved_a_int = cache.FromUnresolvedMerge(unresolved_a, integer, nullptr);
+  ASSERT_TRUE(unresolved_a_int.IsUnresolvedMergedReference());
+  const RegType& unresolved_b_int = cache.FromUnresolvedMerge(unresolved_b, integer, nullptr);
+  ASSERT_TRUE(unresolved_b_int.IsUnresolvedMergedReference());
+  const RegType& unresolved_ab_int = cache.FromUnresolvedMerge(unresolved_ab, integer, nullptr);
+  ASSERT_TRUE(unresolved_ab_int.IsUnresolvedMergedReference());
+  std::vector<const RegType*> uninitialized_types = {
+      &uninit_this, &uninit_obj_0, &uninit_obj_1, &uninit_number_0, &uninit_integer_0
+  };
+  std::vector<const RegType*> unresolved_types = {
+      &unresolved_a,
+      &unresolved_b,
+      &unresolved_ab,
+      &unresolved_a_num,
+      &unresolved_b_num,
+      &unresolved_ab_num,
+      &unresolved_a_int,
+      &unresolved_b_int,
+      &unresolved_ab_int
+  };
+  std::vector<const RegType*> uninit_unresolved_types = {
+      &uninit_unres_this, &uninit_unres_a_0, &uninit_unres_b_0
+  };
+  std::vector<const RegType*> plain_nonobj_classes = { &number, &integer };
+  std::vector<const RegType*> plain_nonobj_arr_classes = {
+      &number_arr,
+      &number_arr_arr,
+      &integer_arr,
+      &char_arr,
+  };
+  // std::vector<const RegType*> others = { &conflict, &zero, &null, &obj, &int_type };
+
+  std::vector<const RegType*> all_minus_uninit_conflict;
+  all_minus_uninit_conflict.insert(all_minus_uninit_conflict.end(),
+                                   unresolved_types.begin(),
+                                   unresolved_types.end());
+  all_minus_uninit_conflict.insert(all_minus_uninit_conflict.end(),
+                                   plain_nonobj_classes.begin(),
+                                   plain_nonobj_classes.end());
+  all_minus_uninit_conflict.insert(all_minus_uninit_conflict.end(),
+                                   plain_nonobj_arr_classes.begin(),
+                                   plain_nonobj_arr_classes.end());
+  all_minus_uninit_conflict.push_back(&zero);
+  all_minus_uninit_conflict.push_back(&null);
+  all_minus_uninit_conflict.push_back(&obj);
+
+  std::vector<const RegType*> all_minus_uninit;
+  all_minus_uninit.insert(all_minus_uninit.end(),
+                          all_minus_uninit_conflict.begin(),
+                          all_minus_uninit_conflict.end());
+  all_minus_uninit.push_back(&conflict);
+
+
+  std::vector<const RegType*> all;
+  all.insert(all.end(), uninitialized_types.begin(), uninitialized_types.end());
+  all.insert(all.end(), uninit_unresolved_types.begin(), uninit_unresolved_types.end());
+  all.insert(all.end(), all_minus_uninit.begin(), all_minus_uninit.end());
+  all.push_back(&int_type);
+
+  auto check = [&](const RegType& in1, const RegType& in2, const RegType& expected_out)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    const RegType& merge_result = in1.SafeMerge(in2, &cache, nullptr);
+    EXPECT_EQ(&expected_out, &merge_result)
+        << in1.Dump() << " x " << in2.Dump() << " = " << merge_result.Dump()
+        << " != " << expected_out.Dump();
+  };
+
+  // Identity.
+  {
+    for (auto r : all) {
+      check(*r, *r, *r);
+    }
+  }
+
+  // Define a covering relation through a list of Edges. We'll then derive LUBs from this and
+  // create checks for every pair of types.
+
+  struct Edge {
+    const RegType& from;
+    const RegType& to;
+
+    Edge(const RegType& from_, const RegType& to_) : from(from_), to(to_) {}
+  };
+  std::vector<Edge> edges;
+#define ADD_EDGE(from, to) edges.emplace_back((from), (to))
+
+  // To Conflict.
+  {
+    for (auto r : uninitialized_types) {
+      ADD_EDGE(*r, conflict);
+    }
+    for (auto r : uninit_unresolved_types) {
+      ADD_EDGE(*r, conflict);
+    }
+    ADD_EDGE(obj, conflict);
+    ADD_EDGE(int_type, conflict);
+  }
+
+  ADD_EDGE(zero, null);
+
+  // Unresolved.
+  {
+    ADD_EDGE(null, unresolved_a);
+    ADD_EDGE(null, unresolved_b);
+    ADD_EDGE(unresolved_a, unresolved_ab);
+    ADD_EDGE(unresolved_b, unresolved_ab);
+
+    ADD_EDGE(number, unresolved_a_num);
+    ADD_EDGE(unresolved_a, unresolved_a_num);
+    ADD_EDGE(number, unresolved_b_num);
+    ADD_EDGE(unresolved_b, unresolved_b_num);
+    ADD_EDGE(number, unresolved_ab_num);
+    ADD_EDGE(unresolved_a_num, unresolved_ab_num);
+    ADD_EDGE(unresolved_b_num, unresolved_ab_num);
+    ADD_EDGE(unresolved_ab, unresolved_ab_num);
+
+    ADD_EDGE(integer, unresolved_a_int);
+    ADD_EDGE(unresolved_a, unresolved_a_int);
+    ADD_EDGE(integer, unresolved_b_int);
+    ADD_EDGE(unresolved_b, unresolved_b_int);
+    ADD_EDGE(integer, unresolved_ab_int);
+    ADD_EDGE(unresolved_a_int, unresolved_ab_int);
+    ADD_EDGE(unresolved_b_int, unresolved_ab_int);
+    ADD_EDGE(unresolved_ab, unresolved_ab_int);
+
+    ADD_EDGE(unresolved_a_int, unresolved_a_num);
+    ADD_EDGE(unresolved_b_int, unresolved_b_num);
+    ADD_EDGE(unresolved_ab_int, unresolved_ab_num);
+
+    ADD_EDGE(unresolved_ab_num, obj);
+  }
+
+  // Classes.
+  {
+    ADD_EDGE(null, integer);
+    ADD_EDGE(integer, number);
+    ADD_EDGE(number, obj);
+  }
+
+  // Arrays.
+  {
+    ADD_EDGE(integer_arr, number_arr);
+    ADD_EDGE(number_arr, obj_arr);
+    ADD_EDGE(obj_arr, obj);
+    ADD_EDGE(number_arr_arr, obj_arr);
+
+    ADD_EDGE(char_arr, obj);
+    ADD_EDGE(byte_arr, obj);
+
+    ADD_EDGE(null, integer_arr);
+    ADD_EDGE(null, number_arr_arr);
+    ADD_EDGE(null, char_arr);
+    ADD_EDGE(null, byte_arr);
+  }
+
+  // Primitive.
+  {
+    ADD_EDGE(zero, int_type);
+  }
+#undef ADD_EDGE
+
+  // Create merge triples by using the covering relation established by edges to derive the
+  // expected merge for any pair of types.
+
+  // Expect merge(in1, in2) == out.
+  struct MergeExpectation {
+    const RegType& in1;
+    const RegType& in2;
+    const RegType& out;
+
+    MergeExpectation(const RegType& in1_, const RegType& in2_, const RegType& out_)
+        : in1(in1_), in2(in2_), out(out_) {}
+  };
+  std::vector<MergeExpectation> expectations;
+
+  for (auto r1 : all) {
+    for (auto r2 : all) {
+      if (r1 == r2) {
+        continue;
+      }
+
+      // Very simple algorithm here that is usually used with adjacency lists. Our graph is
+      // small, it didn't make sense to have lists per node. Thus, the regular guarantees
+      // of O(n + |e|) don't apply, but that is acceptable.
+      //
+      // To compute r1 lub r2 = merge(r1, r2):
+      //   1) Generate the reachable set of r1, name it grey.
+      //   2) Mark all grey reachable nodes of r2 as black.
+      //   3) Find black nodes with no in-edges from other black nodes.
+      //   4) If |3)| == 1, that's the lub.
+
+      // Generic BFS of the graph induced by edges, starting at start. new_node will be called
+      // with any discovered node, in order.
+      auto bfs = [&](auto new_node, const RegType* start) {
+        std::unordered_set<const RegType*> seen;
+        std::queue<const RegType*> work_list;
+        work_list.push(start);
+        while (!work_list.empty()) {
+          const RegType* cur = work_list.front();
+          work_list.pop();
+          auto it = seen.find(cur);
+          if (it != seen.end()) {
+            continue;
+          }
+          seen.insert(cur);
+          new_node(cur);
+
+          for (const Edge& edge : edges) {
+            if (&edge.from == cur) {
+              work_list.push(&edge.to);
+            }
+          }
+        }
+      };
+
+      std::unordered_set<const RegType*> grey;
+      auto compute_grey = [&](const RegType* cur) {
+        grey.insert(cur);  // Mark discovered node as grey.
+      };
+      bfs(compute_grey, r1);
+
+      std::set<const RegType*> black;
+      auto compute_black = [&](const RegType* cur) {
+        // Mark discovered grey node as black.
+        if (grey.find(cur) != grey.end()) {
+          black.insert(cur);
+        }
+      };
+      bfs(compute_black, r2);
+
+      std::set<const RegType*> no_in_edge(black);  // Copy of black, remove nodes with in-edges.
+      for (auto r : black) {
+        for (Edge& e : edges) {
+          if (&e.from == r) {
+            no_in_edge.erase(&e.to);  // It doesn't matter whether "to" is black or not, just
+                                      // attempt to remove it.
+          }
+        }
+      }
+
+      // Helper to print sets when something went wrong.
+      auto print_set = [](auto& container) REQUIRES_SHARED(Locks::mutator_lock_) {
+        std::string result;
+        for (auto r : container) {
+          result.append(" + ");
+          result.append(r->Dump());
+        }
+        return result;
+      };
+      ASSERT_EQ(no_in_edge.size(), 1u) << r1->Dump() << " u " << r2->Dump()
+                                       << " grey=" << print_set(grey)
+                                       << " black=" << print_set(black)
+                                       << " no-in-edge=" << print_set(no_in_edge);
+      expectations.emplace_back(*r1, *r2, **no_in_edge.begin());
+    }
+  }
+
+  // Evaluate merge expectations. The merge is expected to be commutative.
+
+  for (auto& triple : expectations) {
+    check(triple.in1, triple.in2, triple.out);
+    check(triple.in2, triple.in1, triple.out);
+  }
+
+  Runtime::Current()->GetHeap()->DecrementDisableMovingGC(soa.Self());
+}
+
 TEST_F(RegTypeTest, ConstPrecision) {
   // Tests creating primitive types types.
   ArenaStack stack(Runtime::Current()->GetArenaPool());
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index a9c9428..39d73f5 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -19,6 +19,7 @@
 
 #include "register_line.h"
 
+#include "base/logging.h"  // For VLOG.
 #include "method_verifier.h"
 #include "reg_type_cache-inl.h"
 
@@ -192,6 +193,27 @@
   SetResultTypeToUnknown(verifier);
 }
 
+inline void RegisterLine::ClearRegToLockDepth(size_t reg, size_t depth) {
+  CHECK_LT(depth, 32u);
+  DCHECK(IsSetLockDepth(reg, depth));
+  auto it = reg_to_lock_depths_.find(reg);
+  DCHECK(it != reg_to_lock_depths_.end());
+  uint32_t depths = it->second ^ (1 << depth);
+  if (depths != 0) {
+    it->second = depths;
+  } else {
+    reg_to_lock_depths_.erase(it);
+  }
+  // Need to unlock every register at the same lock depth. These are aliased locks.
+  uint32_t mask = 1 << depth;
+  for (auto& pair : reg_to_lock_depths_) {
+    if ((pair.second & mask) != 0) {
+      VLOG(verifier) << "Also unlocking " << pair.first;
+      pair.second ^= mask;
+    }
+  }
+}
+
 inline void RegisterLineArenaDelete::operator()(RegisterLine* ptr) const {
   if (ptr != nullptr) {
     ptr->~RegisterLine();
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 221aa80..82f63b2 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -20,6 +20,8 @@
 #include <memory>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "base/scoped_arena_containers.h"
 #include "safe_map.h"
 
@@ -353,6 +355,23 @@
     return monitors_[i];
   }
 
+  // We give access to the lock depth map to avoid an expensive poll loop for FindLocksAtDexPC.
+  template <typename T>
+  void IterateRegToLockDepths(T fn) const {
+    for (const auto& pair : reg_to_lock_depths_) {
+      const uint32_t reg = pair.first;
+      uint32_t depths = pair.second;
+      uint32_t depth = 0;
+      while (depths != 0) {
+        if ((depths & 1) != 0) {
+          fn(reg, depth);
+        }
+        depths >>= 1;
+        depth++;
+      }
+    }
+  }
+
  private:
   void CopyRegToLockDepth(size_t dst, size_t src) {
     auto it = reg_to_lock_depths_.find(src);
@@ -384,26 +403,7 @@
     return true;
   }
 
-  void ClearRegToLockDepth(size_t reg, size_t depth) {
-    CHECK_LT(depth, 32u);
-    DCHECK(IsSetLockDepth(reg, depth));
-    auto it = reg_to_lock_depths_.find(reg);
-    DCHECK(it != reg_to_lock_depths_.end());
-    uint32_t depths = it->second ^ (1 << depth);
-    if (depths != 0) {
-      it->second = depths;
-    } else {
-      reg_to_lock_depths_.erase(it);
-    }
-    // Need to unlock every register at the same lock depth. These are aliased locks.
-    uint32_t mask = 1 << depth;
-    for (auto& pair : reg_to_lock_depths_) {
-      if ((pair.second & mask) != 0) {
-        VLOG(verifier) << "Also unlocking " << pair.first;
-        pair.second ^= mask;
-      }
-    }
-  }
+  void ClearRegToLockDepth(size_t reg, size_t depth);
 
   void ClearAllRegToLockDepths(size_t reg) {
     reg_to_lock_depths_.erase(reg);
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 9722db9..5a653fe 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -20,9 +20,9 @@
 
 #include <sstream>
 
-#include "android-base/stringprintf.h"
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 
-#include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
diff --git a/runtime/zip_archive.h b/runtime/zip_archive.h
index 821cc5c..75f8757 100644
--- a/runtime/zip_archive.h
+++ b/runtime/zip_archive.h
@@ -21,7 +21,8 @@
 #include <memory>
 #include <string>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/unix_file/random_access_file.h"
 #include "globals.h"
 #include "mem_map.h"
diff --git a/simulator/code_simulator_arm64.cc b/simulator/code_simulator_arm64.cc
index 939d2e2..a64bd0b 100644
--- a/simulator/code_simulator_arm64.cc
+++ b/simulator/code_simulator_arm64.cc
@@ -16,7 +16,7 @@
 
 #include "code_simulator_arm64.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
 
diff --git a/simulator/code_simulator_container.cc b/simulator/code_simulator_container.cc
index a5f05dc..9f52b32 100644
--- a/simulator/code_simulator_container.cc
+++ b/simulator/code_simulator_container.cc
@@ -18,6 +18,7 @@
 
 #include "code_simulator_container.h"
 
+#include "base/logging.h"  // For VLOG.
 #include "code_simulator.h"
 #include "globals.h"
 
diff --git a/simulator/code_simulator_container.h b/simulator/code_simulator_container.h
index 31a915e..a219715 100644
--- a/simulator/code_simulator_container.h
+++ b/simulator/code_simulator_container.h
@@ -17,8 +17,9 @@
 #ifndef ART_SIMULATOR_CODE_SIMULATOR_CONTAINER_H_
 #define ART_SIMULATOR_CODE_SIMULATOR_CONTAINER_H_
 
+#include <android-base/logging.h>
+
 #include "arch/instruction_set.h"
-#include "base/logging.h"
 
 namespace art {
 
diff --git a/test/004-JniTest/expected.txt b/test/004-JniTest/expected.txt
index 1d05160..b09b9a2 100644
--- a/test/004-JniTest/expected.txt
+++ b/test/004-JniTest/expected.txt
@@ -1,4 +1,5 @@
 JNI_OnLoad called
+ABC.XYZ = 12, GetStaticIntField(DEF.class, 'XYZ') = 12
 Super.<init>
 Super.<init>
 Subclass.<init>
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index bc5a0a6..33a8f5b 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -20,8 +20,10 @@
 #include <iostream>
 #include <vector>
 
+#include <android-base/logging.h>
+
 #include "art_method-inl.h"
-#include "base/logging.h"
+#include "base/runtime_debug.h"
 #include "jni.h"
 
 namespace art {
@@ -88,6 +90,14 @@
   CHECK(!env->ExceptionCheck());
 }
 
+extern "C" JNIEXPORT jint JNICALL Java_Main_getFieldSubclass(JNIEnv* env,
+                                                             jclass,
+                                                             jobject f_obj,
+                                                             jclass sub) {
+  jfieldID f = env->FromReflectedField(f_obj);
+  return env->GetStaticIntField(sub, f);
+}
+
 // http://b/10994325
 extern "C" JNIEXPORT void JNICALL Java_Main_testFindClassOnAttachedNativeThread(JNIEnv*, jclass) {
   PthreadHelper(&testFindClassOnAttachedNativeThread);
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index 871107c..f94dcf6 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -18,6 +18,7 @@
 import java.lang.reflect.InvocationHandler;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.lang.reflect.Field;
 import java.lang.reflect.Proxy;
 import java.util.regex.Pattern;
 
@@ -32,6 +33,7 @@
           throw new RuntimeException("Slow-debug flags unexpectedly off.");
         }
 
+        testFieldSubclass();
         testFindClassOnAttachedNativeThread();
         testFindFieldOnAttachedNativeThread();
         testReflectFieldGetFromAttachedNativeThreadNative();
@@ -65,6 +67,19 @@
         testDoubleLoad(args[0]);
     }
 
+    static class ABC { public static int XYZ = 12; }
+    static class DEF extends ABC {}
+    public static void testFieldSubclass() {
+      try {
+        System.out.println("ABC.XYZ = " + ABC.XYZ + ", GetStaticIntField(DEF.class, 'XYZ') = " +
+            getFieldSubclass(ABC.class.getDeclaredField("XYZ"), DEF.class));
+      } catch (Exception e) {
+        throw new RuntimeException("Failed to test get static field on a subclass", e);
+      }
+    }
+
+    public static native int getFieldSubclass(Field f, Class sub);
+
     private static native boolean registerNativesJniTest();
 
     private static native void testCallDefaultMethods();
diff --git a/test/044-proxy/native_proxy.cc b/test/044-proxy/native_proxy.cc
index f168719..f3178f9 100644
--- a/test/044-proxy/native_proxy.cc
+++ b/test/044-proxy/native_proxy.cc
@@ -16,7 +16,7 @@
 
 #include "jni.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
 
 namespace art {
 
diff --git a/test/071-dexfile-get-static-size/build b/test/071-dexfile-get-static-size/build
new file mode 100755
index 0000000..0bba66d
--- /dev/null
+++ b/test/071-dexfile-get-static-size/build
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@"
+
+# Create and add as resources to the test jar file:
+# 1. test1.dex
+# 2. test2.dex
+# 3. test-jar.jar, containing test1.dex as classes.dex
+# 4. multi-jar.jar, containing test1.dex as classes.dex and test2.dex as classes2.dex
+mkdir test-jar
+cp test1.dex test-jar/classes.dex
+cp test2.dex test-jar/classes2.dex
+zip -j test-jar.jar test-jar/classes.dex
+zip -j multi-jar.jar test-jar/classes.dex test-jar/classes2.dex
+jar uf ${TEST_NAME}.jar test1.dex test2.dex test-jar.jar multi-jar.jar
+
diff --git a/test/071-dexfile-get-static-size/expected.txt b/test/071-dexfile-get-static-size/expected.txt
new file mode 100644
index 0000000..dfb77c3
--- /dev/null
+++ b/test/071-dexfile-get-static-size/expected.txt
@@ -0,0 +1,4 @@
+Size for test1.dex: 1864
+Size for test2.dex: 1264
+Size for test-jar.jar: 1864
+Size for multi-jar.jar: 3128
diff --git a/test/071-dexfile-get-static-size/info.txt b/test/071-dexfile-get-static-size/info.txt
new file mode 100644
index 0000000..5b528e8
--- /dev/null
+++ b/test/071-dexfile-get-static-size/info.txt
@@ -0,0 +1,3 @@
+Test DexFile.getStaticSizeOfDexFile API.
+
+test1.dex and test2.dex are arbitrary valid dex files.
diff --git a/test/071-dexfile-get-static-size/src/Main.java b/test/071-dexfile-get-static-size/src/Main.java
new file mode 100644
index 0000000..4bf4538
--- /dev/null
+++ b/test/071-dexfile-get-static-size/src/Main.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.FileOutputStream;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+public class Main {
+    private static void extractResource(String resource, String filename) throws Exception {
+        ClassLoader loader = Main.class.getClassLoader();
+        InputStream is = loader.getResourceAsStream(resource);
+        OutputStream os = new FileOutputStream(filename);
+        int read;
+        byte[] buf = new byte[4096];
+        while ((read = is.read(buf)) >= 0) {
+          os.write(buf, 0, read);
+        }
+        is.close();
+        os.close();
+    }
+
+    private static long getDexFileSize(String filename) throws Exception {
+        ClassLoader loader = Main.class.getClassLoader();
+        Class<?> DexFile = loader.loadClass("dalvik.system.DexFile");
+        Method DexFile_loadDex = DexFile.getMethod("loadDex",
+                                                   String.class,
+                                                   String.class,
+                                                   Integer.TYPE);
+        Method DexFile_getStaticSizeOfDexFile = DexFile.getMethod("getStaticSizeOfDexFile");
+        Object dexFile = DexFile_loadDex.invoke(null, filename, null, 0);
+        return (Long) DexFile_getStaticSizeOfDexFile.invoke(dexFile);
+    }
+
+    private static void test(String resource) throws Exception {
+        String filename = System.getenv("DEX_LOCATION") + "/" + resource;
+        extractResource(resource, filename);
+        long size = getDexFileSize(filename);
+        System.out.println("Size for " + resource + ": " + size);
+    }
+
+    public static void main(String[] args) throws Exception {
+        test("test1.dex");
+        test("test2.dex");
+        test("test-jar.jar");
+        test("multi-jar.jar");
+    }
+}
diff --git a/test/071-dexfile-get-static-size/test1.dex b/test/071-dexfile-get-static-size/test1.dex
new file mode 100644
index 0000000..84602d0
--- /dev/null
+++ b/test/071-dexfile-get-static-size/test1.dex
Binary files differ
diff --git a/test/071-dexfile-get-static-size/test2.dex b/test/071-dexfile-get-static-size/test2.dex
new file mode 100644
index 0000000..a07c46e
--- /dev/null
+++ b/test/071-dexfile-get-static-size/test2.dex
Binary files differ
diff --git a/test/099-vmdebug/expected.txt b/test/099-vmdebug/expected.txt
index b8d72f6..f7801de 100644
--- a/test/099-vmdebug/expected.txt
+++ b/test/099-vmdebug/expected.txt
@@ -23,3 +23,9 @@
 Instances of ClassA assignable 3
 Array counts [2, 1, 0]
 Array counts assignable [3, 1, 0]
+ClassD got 3, combined mask: 13
+ClassE got 2, combined mask: 18
+null got 0
+ClassD assignable got 5, combined mask: 31
+ClassE assignable got 2, combined mask: 18
+null assignable got 0
diff --git a/test/099-vmdebug/info.txt b/test/099-vmdebug/info.txt
index 7f88086..873429e 100644
--- a/test/099-vmdebug/info.txt
+++ b/test/099-vmdebug/info.txt
@@ -1 +1 @@
-Tests of private dalvik.system.VMDebug support for method tracing.
+Tests of dalvik.system.VMDebug APIs.
diff --git a/test/099-vmdebug/src/Main.java b/test/099-vmdebug/src/Main.java
index 90ad315..e0d829a 100644
--- a/test/099-vmdebug/src/Main.java
+++ b/test/099-vmdebug/src/Main.java
@@ -33,6 +33,7 @@
         }
         testMethodTracing();
         testCountInstances();
+        testGetInstances();
         testRuntimeStat();
         testRuntimeStats();
     }
@@ -249,6 +250,59 @@
         System.out.println("Array counts assignable " + Arrays.toString(counts));
     }
 
+    static class ClassD {
+        public int mask;
+
+        public ClassD(int mask) {
+            this.mask = mask;
+        }
+    }
+
+    static class ClassE extends ClassD {
+        public ClassE(int mask) {
+            super(mask);
+        }
+    }
+
+    private static void testGetInstances() throws Exception {
+        ArrayList<Object> l = new ArrayList<Object>();
+        l.add(new ClassD(0x01));
+        l.add(new ClassE(0x02));
+        l.add(new ClassD(0x04));
+        l.add(new ClassD(0x08));
+        l.add(new ClassE(0x10));
+        Runtime.getRuntime().gc();
+        Class<?>[] classes = new Class<?>[] {ClassD.class, ClassE.class, null};
+        Object[][] instances = VMDebug.getInstancesOfClasses(classes, false);
+
+        int mask = 0;
+        for (Object instance : instances[0]) {
+            mask |= ((ClassD)instance).mask;
+        }
+        System.out.println("ClassD got " + instances[0].length + ", combined mask: " + mask);
+
+        mask = 0;
+        for (Object instance : instances[1]) {
+            mask |= ((ClassD)instance).mask;
+        }
+        System.out.println("ClassE got " + instances[1].length + ", combined mask: " + mask);
+        System.out.println("null got " + instances[2].length);
+
+        instances = VMDebug.getInstancesOfClasses(classes, true);
+        mask = 0;
+        for (Object instance : instances[0]) {
+            mask |= ((ClassD)instance).mask;
+        }
+        System.out.println("ClassD assignable got " + instances[0].length + ", combined mask: " + mask);
+
+        mask = 0;
+        for (Object instance : instances[1]) {
+            mask |= ((ClassD)instance).mask;
+        }
+        System.out.println("ClassE assignable got " + instances[1].length + ", combined mask: " + mask);
+        System.out.println("null assignable got " + instances[2].length);
+    }
+
     private static class VMDebug {
         private static final Method startMethodTracingMethod;
         private static final Method stopMethodTracingMethod;
@@ -257,6 +311,7 @@
         private static final Method getRuntimeStatsMethod;
         private static final Method countInstancesOfClassMethod;
         private static final Method countInstancesOfClassesMethod;
+        private static final Method getInstancesOfClassesMethod;
         static {
             try {
                 Class<?> c = Class.forName("dalvik.system.VMDebug");
@@ -270,6 +325,8 @@
                         Class.class, Boolean.TYPE);
                 countInstancesOfClassesMethod = c.getDeclaredMethod("countInstancesOfClasses",
                         Class[].class, Boolean.TYPE);
+                getInstancesOfClassesMethod = c.getDeclaredMethod("getInstancesOfClasses",
+                        Class[].class, Boolean.TYPE);
             } catch (Exception e) {
                 throw new RuntimeException(e);
             }
@@ -300,5 +357,9 @@
             return (long[]) countInstancesOfClassesMethod.invoke(
                     null, new Object[]{classes, assignable});
         }
+        public static Object[][] getInstancesOfClasses(Class<?>[] classes, boolean assignable) throws Exception {
+            return (Object[][]) getInstancesOfClassesMethod.invoke(
+                    null, new Object[]{classes, assignable});
+        }
     }
 }
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 58b33be..ef758e8 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -25,11 +25,11 @@
 
 #include "jni.h"
 
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
 #include <backtrace/Backtrace.h>
-#include "android-base/stringprintf.h"
 
 #include "base/file_utils.h"
-#include "base/logging.h"
 #include "base/macros.h"
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
@@ -104,20 +104,6 @@
 }
 #endif
 
-// Currently we have to fall back to our own loader for the boot image when it's compiled PIC
-// because its base is zero. Thus in-process unwinding through it won't work. This is a helper
-// detecting this.
-#if __linux__
-static bool IsPicImage() {
-  std::vector<gc::space::ImageSpace*> image_spaces =
-      Runtime::Current()->GetHeap()->GetBootImageSpaces();
-  CHECK(!image_spaces.empty());  // We should be running with an image.
-  const OatFile* oat_file = image_spaces[0]->GetOatFile();
-  CHECK(oat_file != nullptr);     // We should have an oat file to go with the image.
-  return oat_file->IsPic();
-}
-#endif
-
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(
     JNIEnv*,
     jobject,
@@ -125,11 +111,6 @@
     jint,
     jboolean) {
 #if __linux__
-  if (IsPicImage()) {
-    LOG(INFO) << "Image is pic, in-process unwinding check bypassed.";
-    return JNI_TRUE;
-  }
-
   // TODO: What to do on Valgrind?
 
   std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid()));
diff --git a/test/137-cfi/run b/test/137-cfi/run
index ebc729b..adea71a 100755
--- a/test/137-cfi/run
+++ b/test/137-cfi/run
@@ -16,10 +16,15 @@
 
 # Test with full DWARF debugging information.
 # Check full signatures of methods.
+# The option jitthreshold:0 ensures that if we run the test in JIT mode,
+# there will be JITed frames on the callstack (it synchronously JITs on first use).
 ${RUN} "$@" -Xcompiler-option --generate-debug-info \
+  --runtime-option -Xjitthreshold:0 \
   --args --full-signatures --args --test-local --args --test-remote
 
 # Test with minimal compressed debugging information.
 # Check only method names (parameters are omitted to save space).
 # Check only remote unwinding since decompression is disabled in local unwinds (b/27391690).
-${RUN} "$@" -Xcompiler-option --generate-mini-debug-info --args --test-remote
+${RUN} "$@" -Xcompiler-option --generate-mini-debug-info \
+  --runtime-option -Xjitthreshold:0 \
+  --args --test-remote
diff --git a/test/166-bad-interface-super/expected.txt b/test/166-bad-interface-super/expected.txt
new file mode 100644
index 0000000..c49f6d2
--- /dev/null
+++ b/test/166-bad-interface-super/expected.txt
@@ -0,0 +1,2 @@
+Caught java.lang.ClassFormatError when trying to resolve BadSuper1.
+Caught java.lang.ClassFormatError when trying to resolve BadSuper2.
diff --git a/test/166-bad-interface-super/info.txt b/test/166-bad-interface-super/info.txt
new file mode 100644
index 0000000..bcba8c0
--- /dev/null
+++ b/test/166-bad-interface-super/info.txt
@@ -0,0 +1 @@
+Test that linking an interface declaring a superclass other than j.l.Object throws CFE.
diff --git a/test/166-bad-interface-super/jasmin/BadSuper1.j b/test/166-bad-interface-super/jasmin/BadSuper1.j
new file mode 100644
index 0000000..f96564e
--- /dev/null
+++ b/test/166-bad-interface-super/jasmin/BadSuper1.j
@@ -0,0 +1,17 @@
+; Copyright (C) 2017 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+.interface               public BadSuper1
+.super                   BaseInterface
+
diff --git a/test/166-bad-interface-super/jasmin/BadSuper2.j b/test/166-bad-interface-super/jasmin/BadSuper2.j
new file mode 100644
index 0000000..584bd20
--- /dev/null
+++ b/test/166-bad-interface-super/jasmin/BadSuper2.j
@@ -0,0 +1,17 @@
+; Copyright (C) 2017 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+;      http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+.interface               public BadSuper2
+.super                   BaseClass
+
diff --git a/test/166-bad-interface-super/src/BaseClass.java b/test/166-bad-interface-super/src/BaseClass.java
new file mode 100644
index 0000000..6ea1ad3
--- /dev/null
+++ b/test/166-bad-interface-super/src/BaseClass.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class BaseClass {
+}
diff --git a/test/166-bad-interface-super/src/BaseInterface.java b/test/166-bad-interface-super/src/BaseInterface.java
new file mode 100644
index 0000000..7872a43
--- /dev/null
+++ b/test/166-bad-interface-super/src/BaseInterface.java
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface BaseInterface {
+}
diff --git a/test/166-bad-interface-super/src/Main.java b/test/166-bad-interface-super/src/Main.java
new file mode 100644
index 0000000..3df2574
--- /dev/null
+++ b/test/166-bad-interface-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main {
+    public static void main(String[] args) throws Exception {
+        tryResolveClassExpectingCFE("BadSuper1");
+        tryResolveClassExpectingCFE("BadSuper2");
+    }
+
+    public static void tryResolveClassExpectingCFE(String className) throws Exception {
+        try {
+            Class.forName(className);
+        } catch (ClassFormatError e) {
+            System.out.println(
+                "Caught " + e.getClass().getName() + " when trying to resolve " + className + ".");
+        }
+    }
+}
diff --git a/test/167-visit-locks/expected.txt b/test/167-visit-locks/expected.txt
new file mode 100644
index 0000000..5157c64
--- /dev/null
+++ b/test/167-visit-locks/expected.txt
@@ -0,0 +1,3 @@
+JNI_OnLoad called
+First
+Second
diff --git a/test/167-visit-locks/info.txt b/test/167-visit-locks/info.txt
new file mode 100644
index 0000000..d849bc3
--- /dev/null
+++ b/test/167-visit-locks/info.txt
@@ -0,0 +1 @@
+Regression test for b/68703210
diff --git a/test/706-jit-skip-compilation/run b/test/167-visit-locks/run
similarity index 71%
copy from test/706-jit-skip-compilation/run
copy to test/167-visit-locks/run
index 6c5720a..9365411 100644
--- a/test/706-jit-skip-compilation/run
+++ b/test/167-visit-locks/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Run without the app image, otherwise the verification results will be cached
-# in the ArtMethod of the image and the test will be skewed.
-exec ${RUN} "${@}" --no-app-image
+# Use a smaller heap so it's easier to potentially fill up.
+exec ${RUN} $@ --runtime-option -Xmx2m
diff --git a/test/167-visit-locks/smali/TestSync.smali b/test/167-visit-locks/smali/TestSync.smali
new file mode 100644
index 0000000..5e68ad7
--- /dev/null
+++ b/test/167-visit-locks/smali/TestSync.smali
@@ -0,0 +1,119 @@
+.class LTestSync;
+.super Ljava/lang/Object;
+.source "Main.java"
+
+
+# direct methods
+.method constructor <init>()V
+    .registers 1
+
+    .prologue
+    .line 6
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+
+    return-void
+.end method
+
+.method public static run()V
+    # v0-v2 were generated by javac+dx for the original src code, keeping them.
+    # v10..v19 are for tracking, aliasing and manipulating the first lock.
+    # v20..v29 are for tracking, aliasing and manipulating the second lock.
+    .registers 30
+
+    .prologue
+    .line 8
+    const-string v1, "First"
+
+    .line 9
+    const-string v2, "Second"
+
+    move-object v10, v1
+    const v1, 0x1
+
+    .line 10
+    monitor-enter v10
+
+    # Introduce a range of dead copies.
+    move-object v11, v10
+    move-object v12, v10
+    move-object v13, v10
+    move-object v14, v10
+    move-object v15, v10
+    move-object/16 v16, v10
+    move-object/16 v17, v10
+    move-object/16 v18, v10
+
+    # Introduce a copy that we'll use for unlock.
+    move-object/16 v19, v10
+
+    # Clobber the original alias.
+    const v10, 0x3
+
+    move-object/16 v20, v2
+    const v2, 0x2
+
+    .line 11
+    :try_start_b
+    monitor-enter v20
+    :try_end_c
+
+    # Introduce a range of dead copies.
+    move-object/16 v21, v20
+    move-object/16 v22, v20
+    move-object/16 v23, v20
+    move-object/16 v24, v20
+    move-object/16 v25, v20
+    move-object/16 v26, v20
+    move-object/16 v27, v20
+
+    # Introduce another copy that we will hold live.
+    move-object/16 v28, v20
+
+    # Clobber the original alias.
+    const v20, 0x5
+
+    # Introduce another copy that we'll use for unlock.
+    move-object/16 v29, v28
+
+    .catchall {:try_start_b .. :try_end_c} :catchall_15
+
+    .line 12
+    :try_start_c
+    invoke-static/range { v28 }, LMain;->run(Ljava/lang/Object;)V
+
+    .line 13
+    monitor-exit v29
+    :try_end_10
+    .catchall {:try_start_c .. :try_end_10} :catchall_12
+
+    .line 14
+    :try_start_10
+    monitor-exit v19
+    :try_end_11
+    .catchall {:try_start_10 .. :try_end_11} :catchall_15
+
+    .line 15
+    return-void
+
+    .line 13
+    :catchall_12
+    move-exception v0
+
+    :try_start_13
+    monitor-exit v29
+    :try_end_14
+    .catchall {:try_start_13 .. :try_end_14} :catchall_12
+
+    :try_start_14
+    throw v0
+
+    .line 14
+    :catchall_15
+    move-exception v0
+
+    monitor-exit v19
+    :try_end_17
+    .catchall {:try_start_14 .. :try_end_17} :catchall_15
+
+    throw v0
+.end method
diff --git a/test/167-visit-locks/src/Main.java b/test/167-visit-locks/src/Main.java
new file mode 100644
index 0000000..d8da927
--- /dev/null
+++ b/test/167-visit-locks/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+        System.loadLibrary(args[0]);
+
+        Class.forName("TestSync").getMethod("run").invoke(null);
+    }
+
+    public static void run(Object o) {
+        testVisitLocks();
+    }
+
+    public static native void testVisitLocks();
+}
diff --git a/test/167-visit-locks/visit_locks.cc b/test/167-visit-locks/visit_locks.cc
new file mode 100644
index 0000000..e79c880
--- /dev/null
+++ b/test/167-visit-locks/visit_locks.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni.h"
+
+#include <iostream>
+
+#include "android-base/logging.h"
+
+#include "arch/context.h"
+#include "art_method.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "mirror/object-inl.h"
+#include "mirror/string.h"
+#include "monitor.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread-current-inl.h"
+
+namespace art {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_testVisitLocks(JNIEnv*, jclass) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  class VisitLocks : public StackVisitor {
+   public:
+    VisitLocks(Thread* thread, Context* context)
+        : StackVisitor(thread, context, StackWalkKind::kIncludeInlinedFrames) {
+    }
+
+    bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+      ArtMethod* m = GetMethod();
+
+      // Ignore runtime methods.
+      if (m == nullptr || m->IsRuntimeMethod()) {
+        return true;
+      }
+
+      if (m->PrettyMethod() == "void TestSync.run()") {
+        // Interesting frame.
+        Monitor::VisitLocks(this, Callback, nullptr);
+        return false;
+      }
+
+      return true;
+    }
+
+    static void Callback(mirror::Object* obj, void*) REQUIRES_SHARED(Locks::mutator_lock_) {
+      CHECK(obj != nullptr);
+      CHECK(obj->IsString());
+      std::cerr << obj->AsString()->ToModifiedUtf8() << std::endl;
+    }
+  };
+  Context* context = Context::Create();
+  VisitLocks vl(soa.Self(), context);
+  vl.WalkStack();
+  delete context;
+}
+
+}  // namespace art
diff --git a/test/706-jit-skip-compilation/run b/test/1940-ddms-ext/check
old mode 100644
new mode 100755
similarity index 67%
copy from test/706-jit-skip-compilation/run
copy to test/1940-ddms-ext/check
index 6c5720a..d2c0384
--- a/test/706-jit-skip-compilation/run
+++ b/test/1940-ddms-ext/check
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Run without the app image, otherwise the verification results will be cached
-# in the ArtMethod of the image and the test will be skewed.
-exec ${RUN} "${@}" --no-app-image
+# Need to pull out the describeException ouput since that won't be there on
+# device.
+sed -e '/\t.*$/d' "$2" | sed -e '/java.lang.ArrayIndexOutOfBoundsException:.*$/d' > "$2.tmp"
+
+./default-check "$1" "$2.tmp"
diff --git a/test/1940-ddms-ext/expected.txt b/test/1940-ddms-ext/expected.txt
index 62d3b7b..1a457a0 100644
--- a/test/1940-ddms-ext/expected.txt
+++ b/test/1940-ddms-ext/expected.txt
@@ -3,8 +3,19 @@
 MyDdmHandler: Putting value 0x800025
 MyDdmHandler: Chunk returned: Chunk(Type: 0xFADE7357, Len: 8, data: [0, 0, 0, 0, 0, -128, 0, 37])
 JVMTI returned chunk: Chunk(Type: 0xFADE7357, Len: 8, data: [0, 0, 0, 0, 0, -128, 0, 37])
+Sending empty data array
+MyDdmHandler: Chunk received: Chunk(Type: 0xDEADBEEF, Len: 0, data: [])
+MyDdmHandler: Putting value 0x1
+MyDdmHandler: Chunk returned: Chunk(Type: 0xFADE7357, Len: 8, data: [0, 0, 0, 0, 0, 0, 0, 1])
+JVMTI returned chunk: Chunk(Type: 0xFADE7357, Len: 8, data: [0, 0, 0, 0, 0, 0, 0, 1])
 Sending chunk: Chunk(Type: 0xDEADBEEF, Len: 8, data: [9, 10, 11, 12, 13, 14, 15, 16])
 Chunk published: Chunk(Type: 0xDEADBEEF, Len: 8, data: [9, 10, 11, 12, 13, 14, 15, 16])
+Sending data [1] to chunk handler -1412567295
+MyDdmHandler: Chunk received: Chunk(Type: 0xABCDEF01, Len: 1, data: [1])
+JVMTI returned chunk: Chunk(Type: 0xFADE7357, Len: 0, data: [])
+Sending data [1] to chunk handler 305419896
+MyDdmHandler: Chunk received: Chunk(Type: 0x12345678, Len: 1, data: [1])
+Got error: JVMTI_ERROR_INTERNAL
 Saw expected thread events.
 Expected chunk type published: 1213221190
 Expected chunk type published: 1297109829
diff --git a/test/1940-ddms-ext/src-art/art/Test1940.java b/test/1940-ddms-ext/src-art/art/Test1940.java
index 9f79eae..226fe35 100644
--- a/test/1940-ddms-ext/src-art/art/Test1940.java
+++ b/test/1940-ddms-ext/src-art/art/Test1940.java
@@ -30,6 +30,8 @@
   public static final int DDMS_HEADER_LENGTH = 8;
   public static final int MY_DDMS_TYPE = 0xDEADBEEF;
   public static final int MY_DDMS_RESPONSE_TYPE = 0xFADE7357;
+  public static final int MY_EMPTY_DDMS_TYPE = 0xABCDEF01;
+  public static final int MY_INVALID_DDMS_TYPE = 0x12345678;
 
   public static final boolean PRINT_ALL_CHUNKS = false;
 
@@ -58,19 +60,27 @@
     public void connected() {}
     public void disconnected() {}
     public Chunk handleChunk(Chunk req) {
-      // For this test we will simply calculate the checksum
-      checkEq(req.type, MY_DDMS_TYPE);
       System.out.println("MyDdmHandler: Chunk received: " + printChunk(req));
-      ByteBuffer b = ByteBuffer.wrap(new byte[8]);
-      Adler32 a = new Adler32();
-      a.update(req.data, req.offset, req.length);
-      b.order(ByteOrder.BIG_ENDIAN);
-      long val = a.getValue();
-      b.putLong(val);
-      System.out.printf("MyDdmHandler: Putting value 0x%X\n", val);
-      Chunk ret = new Chunk(MY_DDMS_RESPONSE_TYPE, b.array(), 0, 8);
-      System.out.println("MyDdmHandler: Chunk returned: " + printChunk(ret));
-      return ret;
+      if (req.type == MY_DDMS_TYPE) {
+        // For this test we will simply calculate the checksum
+        ByteBuffer b = ByteBuffer.wrap(new byte[8]);
+        Adler32 a = new Adler32();
+        a.update(req.data, req.offset, req.length);
+        b.order(ByteOrder.BIG_ENDIAN);
+        long val = a.getValue();
+        b.putLong(val);
+        System.out.printf("MyDdmHandler: Putting value 0x%X\n", val);
+        Chunk ret = new Chunk(MY_DDMS_RESPONSE_TYPE, b.array(), 0, 8);
+        System.out.println("MyDdmHandler: Chunk returned: " + printChunk(ret));
+        return ret;
+      } else if (req.type == MY_EMPTY_DDMS_TYPE) {
+        return new Chunk(MY_DDMS_RESPONSE_TYPE, new byte[0], 0, 0);
+      } else if (req.type == MY_INVALID_DDMS_TYPE) {
+        // This is a very invalid chunk.
+        return new Chunk(MY_DDMS_RESPONSE_TYPE, new byte[] { 0 }, /*offset*/ 12, /*length*/ 55);
+      } else {
+        throw new TestError("Unknown ddm request type: " + req.type);
+      }
     }
   }
 
@@ -113,18 +123,42 @@
         Test1940.class.getDeclaredMethod("HandlePublish", Integer.TYPE, new byte[0].getClass()));
     // Test sending chunk directly.
     DdmServer.registerHandler(MY_DDMS_TYPE, SINGLE_HANDLER);
+    DdmServer.registerHandler(MY_EMPTY_DDMS_TYPE, SINGLE_HANDLER);
+    DdmServer.registerHandler(MY_INVALID_DDMS_TYPE, SINGLE_HANDLER);
     DdmServer.registrationComplete();
     byte[] data = new byte[] { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
     System.out.println("Sending data " + Arrays.toString(data));
     Chunk res = processChunk(data);
     System.out.println("JVMTI returned chunk: " + printChunk(res));
 
+    // Test sending an empty chunk.
+    System.out.println("Sending empty data array");
+    res = processChunk(new byte[0]);
+    System.out.println("JVMTI returned chunk: " + printChunk(res));
+
     // Test sending chunk through DdmServer#sendChunk
     Chunk c = new Chunk(
         MY_DDMS_TYPE, new byte[] { 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 }, 0, 8);
     System.out.println("Sending chunk: " + printChunk(c));
     DdmServer.sendChunk(c);
 
+    // Test getting back an empty chunk.
+    data = new byte[] { 0x1 };
+    System.out.println(
+        "Sending data " + Arrays.toString(data) + " to chunk handler " + MY_EMPTY_DDMS_TYPE);
+    res = processChunk(new Chunk(MY_EMPTY_DDMS_TYPE, data, 0, 1));
+    System.out.println("JVMTI returned chunk: " + printChunk(res));
+
+    // Test getting back an invalid chunk.
+    System.out.println(
+        "Sending data " + Arrays.toString(data) + " to chunk handler " + MY_INVALID_DDMS_TYPE);
+    try {
+      res = processChunk(new Chunk(MY_INVALID_DDMS_TYPE, data, 0, 1));
+      System.out.println("JVMTI returned chunk: " + printChunk(res));
+    } catch (RuntimeException e) {
+      System.out.println("Got error: " + e.getMessage());
+    }
+
     // Test thread chunks are sent.
     final boolean[] types_seen = new boolean[] { false, false, false };
     CURRENT_HANDLER = (type, cdata) -> {
diff --git a/test/518-null-array-get/expected.txt b/test/518-null-array-get/expected.txt
index e69de29..ae5318e 100644
--- a/test/518-null-array-get/expected.txt
+++ b/test/518-null-array-get/expected.txt
@@ -0,0 +1,6 @@
+NullArrayFailInt2Object
+NullArrayFailObject2Int
+NullArraySuccessInt
+NullArraySuccessInt2Float
+NullArraySuccessShort
+NullArraySuccessRef
diff --git a/test/518-null-array-get/info.txt b/test/518-null-array-get/info.txt
index 407f590..71e0332 100644
--- a/test/518-null-array-get/info.txt
+++ b/test/518-null-array-get/info.txt
@@ -1,3 +1,9 @@
-Regression test for Quick and Optimizing that used
-to crash on an aget-object + int-to-byte sequence
-(accepted by the verifier in the case the array was null).
+Codifies that the verifier should reject type-unsafe
+instructions in dead code after aget on null, but pass
+type-safe dead code.
+
+Previously verification stopped after aget on null and
+punted the method to the interpreter in an effort to avoid
+compiler crashes. As broken code appears very uncommon,
+ensure verifier strictness and help the compilers see more
+code.
diff --git a/test/518-null-array-get/smali/NullArray.smali b/test/518-null-array-get/smali/NullArrayFailInt2Object.smali
similarity index 73%
copy from test/518-null-array-get/smali/NullArray.smali
copy to test/518-null-array-get/smali/NullArrayFailInt2Object.smali
index 52abc38..ca4ed10 100644
--- a/test/518-null-array-get/smali/NullArray.smali
+++ b/test/518-null-array-get/smali/NullArrayFailInt2Object.smali
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.class public LNullArray;
+# Check that the result of aget on null cannot be used as a reference.
+
+.class public LNullArrayFailInt2Object;
 
 .super Ljava/lang/Object;
 
-.method public static method()B
+.method public static method()V
    .registers 2
    const/4 v0, 0
    const/4 v1, 0
-   aget-object v0, v0, v1
-   int-to-byte v0, v0
-   return v0
+   aget v0, v0, v1
+   invoke-virtual { v0 }, Ljava/lang/Object;->toString()Ljava/lang/String;
+   return-void
 .end method
diff --git a/test/518-null-array-get/smali/NullArray.smali b/test/518-null-array-get/smali/NullArrayFailObject2Int.smali
similarity index 86%
rename from test/518-null-array-get/smali/NullArray.smali
rename to test/518-null-array-get/smali/NullArrayFailObject2Int.smali
index 52abc38..83823a2 100644
--- a/test/518-null-array-get/smali/NullArray.smali
+++ b/test/518-null-array-get/smali/NullArrayFailObject2Int.smali
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.class public LNullArray;
+# Check that the result of aget-object on null cannot be used as an integral.
+
+.class public LNullArrayFailObject2Int;
 
 .super Ljava/lang/Object;
 
diff --git a/test/518-null-array-get/smali/NullArray.smali b/test/518-null-array-get/smali/NullArraySuccessInt.smali
similarity index 69%
copy from test/518-null-array-get/smali/NullArray.smali
copy to test/518-null-array-get/smali/NullArraySuccessInt.smali
index 52abc38..01cf1c9 100644
--- a/test/518-null-array-get/smali/NullArray.smali
+++ b/test/518-null-array-get/smali/NullArraySuccessInt.smali
@@ -12,15 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.class public LNullArray;
+# Check that the result of aget on null can be used as an int.
+
+.class public LNullArraySuccessInt;
 
 .super Ljava/lang/Object;
 
-.method public static method()B
+.method public static intMethod(I)V
+   .registers 1
+   return-void
+.end method
+
+.method public static method()V
    .registers 2
    const/4 v0, 0
    const/4 v1, 0
-   aget-object v0, v0, v1
-   int-to-byte v0, v0
-   return v0
+   aget v0, v0, v1
+   invoke-static { v0 }, LNullArraySuccessInt;->intMethod(I)V
+   return-void
 .end method
diff --git a/test/518-null-array-get/smali/NullArray.smali b/test/518-null-array-get/smali/NullArraySuccessInt2Float.smali
similarity index 67%
copy from test/518-null-array-get/smali/NullArray.smali
copy to test/518-null-array-get/smali/NullArraySuccessInt2Float.smali
index 52abc38..bd59d5f 100644
--- a/test/518-null-array-get/smali/NullArray.smali
+++ b/test/518-null-array-get/smali/NullArraySuccessInt2Float.smali
@@ -12,15 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.class public LNullArray;
+# Check that the result of aget on null can be used as a float.
+
+.class public LNullArraySuccessInt2Float;
 
 .super Ljava/lang/Object;
 
-.method public static method()B
+.method public static floatMethod(F)V
+   .registers 1
+   return-void
+.end method
+
+.method public static method()V
    .registers 2
    const/4 v0, 0
    const/4 v1, 0
-   aget-object v0, v0, v1
-   int-to-byte v0, v0
-   return v0
+   aget v0, v0, v1
+   invoke-static { v0 }, LNullArraySuccessInt2Float;->floatMethod(F)V
+   return-void
 .end method
diff --git a/test/518-null-array-get/smali/NullArray.smali b/test/518-null-array-get/smali/NullArraySuccessRef.smali
similarity index 70%
copy from test/518-null-array-get/smali/NullArray.smali
copy to test/518-null-array-get/smali/NullArraySuccessRef.smali
index 52abc38..2f512d4 100644
--- a/test/518-null-array-get/smali/NullArray.smali
+++ b/test/518-null-array-get/smali/NullArraySuccessRef.smali
@@ -12,15 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.class public LNullArray;
+# Check that the result of aget-object on null can be used as a reference.
+
+.class public LNullArraySuccessRef;
 
 .super Ljava/lang/Object;
 
-.method public static method()B
+.method public voidMethod()V
+   .registers 1
+   return-void
+.end method
+
+.method public static method()V
    .registers 2
    const/4 v0, 0
    const/4 v1, 0
    aget-object v0, v0, v1
-   int-to-byte v0, v0
-   return v0
+   invoke-virtual { v0 }, LNullArraySuccessRef;->voidMethod()V
+   return-void
 .end method
diff --git a/test/518-null-array-get/smali/NullArray.smali b/test/518-null-array-get/smali/NullArraySuccessShort.smali
similarity index 67%
copy from test/518-null-array-get/smali/NullArray.smali
copy to test/518-null-array-get/smali/NullArraySuccessShort.smali
index 52abc38..d332e51 100644
--- a/test/518-null-array-get/smali/NullArray.smali
+++ b/test/518-null-array-get/smali/NullArraySuccessShort.smali
@@ -12,15 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-.class public LNullArray;
+# Check that the result of aget-short on null can be used as a short.
+
+.class public LNullArraySuccessShort;
 
 .super Ljava/lang/Object;
 
-.method public static method()B
+.method public static shortMethod(S)V
+   .registers 1
+   return-void
+.end method
+
+.method public static method()V
    .registers 2
    const/4 v0, 0
    const/4 v1, 0
-   aget-object v0, v0, v1
-   int-to-byte v0, v0
-   return v0
+   aget-short v0, v0, v1
+   invoke-static { v0 }, LNullArraySuccessShort;->shortMethod(S)V
+   return-void
 .end method
diff --git a/test/518-null-array-get/src/Main.java b/test/518-null-array-get/src/Main.java
index 66e50aa..678aef1 100644
--- a/test/518-null-array-get/src/Main.java
+++ b/test/518-null-array-get/src/Main.java
@@ -22,16 +22,36 @@
   class InnerClass {}
 
   public static void main(String[] args) throws Exception {
-    Class<?> c = Class.forName("NullArray");
-    Method m = c.getMethod("method");
-    Object[] arguments = { };
+    checkLoad("NullArrayFailInt2Object", true);
+    checkLoad("NullArrayFailObject2Int", true);
+    checkLoad("NullArraySuccessInt", false);
+    checkLoad("NullArraySuccessInt2Float", false);
+    checkLoad("NullArraySuccessShort", false);
+    checkLoad("NullArraySuccessRef", false);
+  }
+
+  private static void checkLoad(String className, boolean expectError) throws Exception {
+    Class<?> c;
     try {
-      m.invoke(null, arguments);
-      throw new Error("Expected an InvocationTargetException");
-    } catch (InvocationTargetException e) {
-      if (!(e.getCause() instanceof NullPointerException)) {
-        throw new Error("Expected a NullPointerException");
+      c = Class.forName(className);
+      if (expectError) {
+        throw new RuntimeException("Expected error for " + className);
       }
+      Method m = c.getMethod("method");
+      try {
+        m.invoke(null);
+        throw new RuntimeException("Expected an InvocationTargetException");
+      } catch (InvocationTargetException e) {
+        if (!(e.getCause() instanceof NullPointerException)) {
+          throw new RuntimeException("Expected a NullPointerException");
+        }
+        System.out.println(className);
+      }
+    } catch (VerifyError e) {
+      if (!expectError) {
+        throw new RuntimeException(e);
+      }
+      System.out.println(className);
     }
   }
 }
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index c4cc3b0..f6332b5 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -999,6 +999,24 @@
     return res;
   }
 
+  /// CHECK-START: void Main.testStoreSameValue() load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArrayGet
+  /// CHECK: ArraySet
+
+  /// CHECK-START: void Main.testStoreSameValue() load_store_elimination (after)
+  /// CHECK: NewArray
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-NOT: ArraySet
+  private static void testStoreSameValue() {
+    Object[] array = new Object[2];
+    sArray = array;
+    Object obj = array[0];
+    array[1] = obj;    // store the same value as the defaut value.
+  }
+
+  static Object[] sArray;
+
   static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
diff --git a/test/530-checker-lse3/expected.txt b/test/530-checker-lse3/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-lse3/expected.txt
diff --git a/test/530-checker-lse3/info.txt b/test/530-checker-lse3/info.txt
new file mode 100644
index 0000000..29b4cb8
--- /dev/null
+++ b/test/530-checker-lse3/info.txt
@@ -0,0 +1,4 @@
+Regression test for load store elimination not respecting the loaded type. When
+a wider value is stored in a narrower field and then loaded from that field,
+LSE needs to replace the value to be stored with a type conversion to the
+narrower type.
diff --git a/test/530-checker-lse3/smali/StoreLoad.smali b/test/530-checker-lse3/smali/StoreLoad.smali
new file mode 100644
index 0000000..7fb582c
--- /dev/null
+++ b/test/530-checker-lse3/smali/StoreLoad.smali
@@ -0,0 +1,62 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LStoreLoad;
+
+.super Ljava/lang/Object;
+
+## CHECK-START: int StoreLoad.test(int) load_store_elimination (before)
+## CHECK-DAG:     <<Arg:i\d+>>    ParameterValue
+## CHECK-DAG:                     StaticFieldSet [{{l\d+}},<<Arg>>] field_name:StoreLoad.byteField
+## CHECK-DAG:                     StaticFieldSet [{{l\d+}},<<Arg>>] field_name:StoreLoad.byteField2
+## CHECK-DAG:     <<Val:b\d+>>    StaticFieldGet [{{l\d+}}] field_name:StoreLoad.byteField
+## CHECK-DAG:     <<Val2:b\d+>>   StaticFieldGet [{{l\d+}}] field_name:StoreLoad.byteField2
+## CHECK-DAG:     <<Val3:i\d+>>   Add [<<Val>>,<<Val2>>]
+## CHECK-DAG:                     Return [<<Val3>>]
+
+## CHECK-START: int StoreLoad.test(int) load_store_elimination (after)
+## CHECK-NOT:                     StaticFieldGet
+
+## CHECK-START: int StoreLoad.test(int) load_store_elimination (after)
+## CHECK-DAG:     <<Arg:i\d+>>    ParameterValue
+## CHECK-DAG:                     StaticFieldSet [{{l\d+}},<<Arg>>] field_name:StoreLoad.byteField
+## CHECK-DAG:                     StaticFieldSet [{{l\d+}},<<Arg>>] field_name:StoreLoad.byteField2
+## CHECK-DAG:     <<Conv:b\d+>>   TypeConversion [<<Arg>>]
+## CHECK-DAG:     <<Val3:i\d+>>   Add [<<Conv>>,<<Conv>>]
+## CHECK-DAG:                     Return [<<Val3>>]
+.method public static test(I)I
+    .registers 2
+    sput-byte v1, LStoreLoad;->byteField:B
+    sput-byte v1, LStoreLoad;->byteField2:B
+    sget-byte v0, LStoreLoad;->byteField:B
+    sget-byte v1, LStoreLoad;->byteField2:B
+    add-int/2addr v0, v1
+    return v0
+.end method
+
+## CHECK-START: int StoreLoad.test2(int) load_store_elimination (before)
+## CHECK-DAG:     <<Arg:i\d+>>    ParameterValue
+## CHECK-DAG:                     StaticFieldSet [{{l\d+}},<<Arg>>] field_name:StoreLoad.byteField
+## CHECK-DAG:                     Return [<<Arg>>]
+
+## CHECK-START: int StoreLoad.test2(int) load_store_elimination (after)
+## CHECK-NOT:                     TypeConversion
+.method public static test2(I)I
+    .registers 1
+    sput-byte v0, LStoreLoad;->byteField:B
+    return v0
+.end method
+
+.field public static byteField:B
+.field public static byteField2:B
diff --git a/test/530-checker-lse3/src/Main.java b/test/530-checker-lse3/src/Main.java
new file mode 100644
index 0000000..caef0b3
--- /dev/null
+++ b/test/530-checker-lse3/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Field;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("StoreLoad");
+    Method m = c.getMethod("test", int.class);
+    int result = (Integer)m.invoke(null, 0x12345678);
+    if (result != (0x78 + 0x78)) {
+      throw new Error("Expected 240, got " + result);
+    }
+    m = c.getMethod("test2", int.class);
+    result = (Integer)m.invoke(null, 0xdeadbeef);
+    if (result != 0xdeadbeef) {
+      throw new Error("Expected 0xdeadbeef, got " + result);
+    }
+    Field f = c.getDeclaredField("byteField");
+    byte b = f.getByte(null);
+    if (b != (byte)0xef) {
+      throw new Error("Expected 0xef, got " + b);
+    }
+    f = c.getDeclaredField("byteField2");
+    b = f.getByte(null);
+    if (b != (byte)0x78) {
+      throw new Error("Expected 0xef, got " + b);
+    }
+  }
+}
diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java
index 61c9e88..f6f877c 100644
--- a/test/532-checker-nonnull-arrayset/src/Main.java
+++ b/test/532-checker-nonnull-arrayset/src/Main.java
@@ -29,9 +29,7 @@
   /// CHECK-NOT:      test
   /// CHECK:          ReturnVoid
   public static void test() {
-    Object[] array = new Object[2];
-    // Storing to static to avoid some lse optimization.
-    sArray = array;
+    Object[] array = sArray;
     Object nonNull = array[0];
     nonNull.getClass(); // Ensure nonNull has an implicit null check.
     array[1] = nonNull;
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 9e6fd3d..b76efea 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -424,31 +424,19 @@
     return - (left * right);
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     VecMul
   /// CHECK-NOT:     VecAdd
 
-  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
-  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:     VecAdd                         loop:<<Loop>>      outer_loop:none
-
-  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
-  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:     VecMultiplyAccumulate kind:Add loop:<<Loop>>      outer_loop:none
-
-  /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
-  /// CHECK-NOT:     VecMul
-  /// CHECK-NOT:     VecAdd
   public static void SimdMulAdd(int[] array1, int[] array2) {
     for (int j = 0; j < 100; j++) {
       array2[j] += 12345 * array1[j];
@@ -473,31 +461,19 @@
     }
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT:     VecMul
   /// CHECK-NOT:     VecSub
 
-  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
-  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
-
-  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
-  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:     VecMultiplyAccumulate kind:Sub loop:<<Loop>>      outer_loop:none
-
-  /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
-  /// CHECK-NOT:     VecMul
-  /// CHECK-NOT:     VecSub
   public static void SimdMulSub(int[] array1, int[] array2) {
     for (int j = 0; j < 100; j++) {
       array2[j] -= 12345 * array1[j];
@@ -522,21 +498,14 @@
     }
   }
 
-  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
   /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
 
-  /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
   /// CHECK-NOT: VecMultiplyAccumulate
 
-  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
-  /// CHECK-DAG:     Phi                            loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:     VecMul                         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:     VecSub                         loop:<<Loop>>      outer_loop:none
-
-  /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
-  /// CHECK-NOT: VecMultiplyAccumulate
   public static void SimdMulMultipleUses(int[] array1, int[] array2) {
     for (int j = 0; j < 100; j++) {
        int temp = 12345 * array1[j];
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 55873ea..3173afd 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -44,24 +44,11 @@
   /// CHECK-START: int Main.testSimple(int) sharpening (before)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:RuntimeCall
 
-  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-START-{ARM,ARM64,MIPS,MIPS64,X86,X86_64}: int Main.testSimple(int) sharpening (after)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
-  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-MIPS64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (before)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
@@ -74,31 +61,14 @@
 
   /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:RuntimeCall
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:RuntimeCall
 
-  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-START-{ARM,ARM64,MIPS,MIPS64,X86,X86_64}: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
   /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
-  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-MIPS64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (before)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-
-  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
@@ -169,30 +139,7 @@
     return x;
   }
 
-  /// CHECK-START: java.lang.String Main.$noinline$getBootImageString() sharpening (before)
-  /// CHECK:                LoadString load_kind:RuntimeCall
-
-  /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BootImageInternTable}}
-
-  /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BootImageInternTable}}
-
-  /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BootImageInternTable}}
-
-  /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BootImageInternTable}}
-
-  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BootImageInternTable}}
-
-  /// CHECK-START-MIPS64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
+  /// CHECK-START-{ARM,ARM64,MIPS,MIPS64,X86,X86_64}: java.lang.String Main.$noinline$getBootImageString() builder (after)
   // Note: load kind depends on PIC/non-PIC
   /// CHECK:                LoadString load_kind:{{BootImageAddress|BootImageInternTable}}
 
@@ -203,31 +150,16 @@
     return "";
   }
 
-  /// CHECK-START: java.lang.String Main.$noinline$getNonBootImageString() sharpening (before)
-  /// CHECK:                LoadString load_kind:RuntimeCall
-
-  /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
+  /// CHECK-START-{ARM,ARM64,MIPS,MIPS64,X86,X86_64}: java.lang.String Main.$noinline$getNonBootImageString() builder (after)
   /// CHECK:                LoadString load_kind:BssEntry
 
+  /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
   /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_x86 (after)
   /// CHECK-DAG:            X86ComputeBaseMethodAddress
   /// CHECK-DAG:            LoadString load_kind:BssEntry
 
-  /// CHECK-START-X86_64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
-  /// CHECK:                LoadString load_kind:BssEntry
-
-  /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
-  /// CHECK:                LoadString load_kind:BssEntry
-
-  /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
-  /// CHECK:                LoadString load_kind:BssEntry
-
-  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
-  /// CHECK:                LoadString load_kind:BssEntry
-
-  /// CHECK-START-MIPS64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
-  /// CHECK:                LoadString load_kind:BssEntry
-
   public static String $noinline$getNonBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
@@ -235,27 +167,7 @@
     return "non-boot-image-string";
   }
 
-  /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BootImageClassTable}} class_name:java.lang.String
-
-  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BootImageClassTable}} class_name:java.lang.String
-
-  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BootImageClassTable}} class_name:java.lang.String
-
-  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BootImageClassTable}} class_name:java.lang.String
-
-  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
-  // Note: load kind depends on PIC/non-PIC
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BootImageClassTable}} class_name:java.lang.String
-
-  /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
+  /// CHECK-START-{ARM,ARM64,MIPS,MIPS64,X86,X86_64}: java.lang.Class Main.$noinline$getStringClass() builder (after)
   // Note: load kind depends on PIC/non-PIC
   /// CHECK:                LoadClass load_kind:{{BootImageAddress|BootImageClassTable}} class_name:java.lang.String
 
@@ -266,28 +178,16 @@
     return String.class;
   }
 
-  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
+  /// CHECK-START-{ARM,ARM64,MIPS,MIPS64,X86,X86_64}: java.lang.Class Main.$noinline$getOtherClass() builder (after)
   /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
 
+  /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
   /// CHECK-START-X86: java.lang.Class Main.$noinline$getOtherClass() pc_relative_fixups_x86 (after)
   /// CHECK-DAG:            X86ComputeBaseMethodAddress
   /// CHECK-DAG:            LoadClass load_kind:BssEntry class_name:Other
 
-  /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
-
-  /// CHECK-START-ARM: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
-
-  /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
-
-  /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
-
-  /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after)
-  /// CHECK:                LoadClass load_kind:BssEntry class_name:Other
-
   public static Class<?> $noinline$getOtherClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 3ef8fe6..29f3817 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -493,6 +493,95 @@
     }
   }
 
+  // Avoid bad scheduler-SIMD interaction.
+  static int doNotMoveSIMD() {
+    int sum = 0;
+    for (int j = 0; j <= 8; j++) {
+      int[] a = new int[17];    // a[i] = 0;
+                                // ConstructorFence ?
+      for (int i = 0; i < a.length; i++) {
+        a[i] += 1;              // a[i] = 1;
+      }
+      for (int i = 0; i < a.length; i++) {
+        sum += a[i];            // expect a[i] = 1;
+      }
+    }
+    return sum;
+  }
+
+  // Ensure spilling saves full SIMD values.
+  private static final int reduction32Values(int[] a, int[] b, int[] c, int[] d) {
+    int s0 = 0;
+    int s1 = 0;
+    int s2 = 0;
+    int s3 = 0;
+    int s4 = 0;
+    int s5 = 0;
+    int s6 = 0;
+    int s7 = 0;
+    int s8 = 0;
+    int s9 = 0;
+    int s10 = 0;
+    int s11 = 0;
+    int s12 = 0;
+    int s13 = 0;
+    int s14 = 0;
+    int s15 = 0;
+    int s16 = 0;
+    int s17 = 0;
+    int s18 = 0;
+    int s19 = 0;
+    int s20 = 0;
+    int s21 = 0;
+    int s22 = 0;
+    int s23 = 0;
+    int s24 = 0;
+    int s25 = 0;
+    int s26 = 0;
+    int s27 = 0;
+    int s28 = 0;
+    int s29 = 0;
+    int s30 = 0;
+    int s31 = 0;
+    for (int i = 1; i < 100; i++) {
+      s0 += a[i];
+      s1 += b[i];
+      s2 += c[i];
+      s3 += d[i];
+      s4 += a[i];
+      s5 += b[i];
+      s6 += c[i];
+      s7 += d[i];
+      s8 += a[i];
+      s9 += b[i];
+      s10 += c[i];
+      s11 += d[i];
+      s12 += a[i];
+      s13 += b[i];
+      s14 += c[i];
+      s15 += d[i];
+      s16 += a[i];
+      s17 += b[i];
+      s18 += c[i];
+      s19 += d[i];
+      s20 += a[i];
+      s21 += b[i];
+      s22 += c[i];
+      s23 += d[i];
+      s24 += a[i];
+      s25 += b[i];
+      s26 += c[i];
+      s27 += d[i];
+      s28 += a[i];
+      s29 += b[i];
+      s30 += c[i];
+      s31 += d[i];
+    }
+    return s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13 + s14 + s15 +
+           s16 + s17 + s18 + s19 + s20 + s21 + s22 + s23 +
+           s24 + s25 + s26 + s27 + s28 + s29 + s30 + s31;
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -655,6 +744,22 @@
       expectEquals((byte)((short) cx[i] + 1), b1[i]);
     }
 
+    expectEquals(153, doNotMoveSIMD());
+
+    {
+      int[] a1 = new int[100];
+      int[] a2 = new int[100];
+      int[] a3 = new int[100];
+      int[] a4 = new int[100];
+      for (int i = 0; i < 100; i++) {
+        a1[i] = i;
+        a2[i] = 1;
+        a3[i] = 100 - i;
+        a4[i] = i % 16;
+      }
+      expectEquals(85800, reduction32Values(a1, a2, a3, a4));
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/706-jit-skip-compilation/expected.txt b/test/638-checker-inline-cache-intrinsic/expected.txt
similarity index 100%
copy from test/706-jit-skip-compilation/expected.txt
copy to test/638-checker-inline-cache-intrinsic/expected.txt
diff --git a/test/638-checker-inline-cache-intrinsic/info.txt b/test/638-checker-inline-cache-intrinsic/info.txt
new file mode 100644
index 0000000..764577b
--- /dev/null
+++ b/test/638-checker-inline-cache-intrinsic/info.txt
@@ -0,0 +1 @@
+Verify the devirtualization of a method that should be intrinsified.
diff --git a/test/706-jit-skip-compilation/run b/test/638-checker-inline-cache-intrinsic/run
similarity index 71%
rename from test/706-jit-skip-compilation/run
rename to test/638-checker-inline-cache-intrinsic/run
index 6c5720a..f43681d 100644
--- a/test/706-jit-skip-compilation/run
+++ b/test/638-checker-inline-cache-intrinsic/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Run without the app image, otherwise the verification results will be cached
-# in the ArtMethod of the image and the test will be skewed.
-exec ${RUN} "${@}" --no-app-image
+exec ${RUN} --jit --runtime-option -Xjitthreshold:100 -Xcompiler-option --verbose-methods=inlineMonomorphic,knownReceiverType,stringEquals $@
diff --git a/test/638-checker-inline-cache-intrinsic/src/Main.java b/test/638-checker-inline-cache-intrinsic/src/Main.java
new file mode 100644
index 0000000..472cbf6
--- /dev/null
+++ b/test/638-checker-inline-cache-intrinsic/src/Main.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: char Main.$noinline$inlineMonomorphic(java.lang.CharSequence) inliner (before)
+  /// CHECK:       InvokeInterface method_name:java.lang.CharSequence.charAt
+
+  /// CHECK-START: char Main.$noinline$inlineMonomorphic(java.lang.CharSequence) inliner (after)
+  /// CHECK:       Deoptimize
+  /// CHECK:       InvokeVirtual method_name:java.lang.String.charAt intrinsic:StringCharAt
+
+  /// CHECK-START: char Main.$noinline$inlineMonomorphic(java.lang.CharSequence) instruction_simplifier$after_inlining (after)
+  /// CHECK:       Deoptimize
+  /// CHECK-NOT:   InvokeInterface
+  /// CHECK-NOT:   InvokeVirtual
+
+  public static char $noinline$inlineMonomorphic(CharSequence cs) {
+    return cs.charAt(0);
+  }
+
+  /// CHECK-START: char Main.$noinline$knownReceiverType() inliner (before)
+  /// CHECK:       InvokeInterface method_name:java.lang.CharSequence.charAt
+
+  /// CHECK-START: char Main.$noinline$knownReceiverType() inliner (after)
+  /// CHECK:       InvokeVirtual method_name:java.lang.String.charAt intrinsic:StringCharAt
+
+  /// CHECK-START: char Main.$noinline$knownReceiverType() instruction_simplifier$after_inlining (after)
+  /// CHECK-NOT:   InvokeInterface
+  /// CHECK-NOT:   InvokeVirtual
+
+  public static char $noinline$knownReceiverType() {
+    CharSequence cs = "abc";
+    return cs.charAt(1);
+  }
+
+  /// CHECK-START: boolean Main.$noinline$stringEquals(java.lang.Object) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:java.lang.Object.equals intrinsic:None
+
+  /// CHECK-START: boolean Main.$noinline$stringEquals(java.lang.Object) inliner (after)
+  /// CHECK:       Deoptimize
+  /// CHECK:       InvokeVirtual method_name:java.lang.Object.equals intrinsic:StringEquals
+
+  /// CHECK-START: boolean Main.$noinline$stringEquals(java.lang.Object) instruction_simplifier$after_inlining (after)
+  /// CHECK:       Deoptimize
+  /// CHECK:       InvokeVirtual method_name:java.lang.Object.equals intrinsic:StringEquals
+
+  public static boolean $noinline$stringEquals(Object obj) {
+    return obj.equals("def");
+  }
+
+  public static void test() {
+    // Warm up inline cache.
+    for (int i = 0; i < 45; i++) {
+      $noinline$inlineMonomorphic(str);
+    }
+    for (int i = 0; i < 60; i++) {
+      $noinline$stringEquals(str);
+    }
+    ensureJitCompiled(Main.class, "$noinline$stringEquals");
+    ensureJitCompiled(Main.class, "$noinline$inlineMonomorphic");
+    ensureJitCompiled(Main.class, "$noinline$knownReceiverType");
+    if ($noinline$inlineMonomorphic(str) != 'x') {
+      throw new Error("Expected x");
+    }
+    if ($noinline$knownReceiverType() != 'b') {
+      throw new Error("Expected b");
+    }
+    if ($noinline$stringEquals("abc")) {
+      throw new Error("Expected false");
+    }
+  }
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    test();
+  }
+
+  static String str = "xyz";
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+}
diff --git a/test/640-checker-boolean-simd/src/Main.java b/test/640-checker-boolean-simd/src/Main.java
index 347f916..7d98e68 100644
--- a/test/640-checker-boolean-simd/src/Main.java
+++ b/test/640-checker-boolean-simd/src/Main.java
@@ -29,17 +29,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.and(boolean) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.and(boolean) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.and(boolean) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.and(boolean) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -52,17 +42,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.or(boolean) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.or(boolean) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.or(boolean) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.or(boolean) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -75,17 +55,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.xor(boolean) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.xor(boolean) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.xor(boolean) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.xor(boolean) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -98,17 +68,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.not() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 5c13fc3..6b69127 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -29,17 +29,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.add(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -52,17 +42,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sub(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -75,17 +55,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.mul(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -111,17 +81,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -134,17 +94,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.not() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -157,17 +107,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shl4() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -180,17 +120,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sar2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java
index b3dff14..317a666 100644
--- a/test/640-checker-char-simd/src/Main.java
+++ b/test/640-checker-char-simd/src/Main.java
@@ -29,17 +29,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.add(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -52,17 +42,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sub(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -75,17 +55,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.mul(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -99,6 +69,7 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.div(int) loop_optimization (after)
+  /// CHECK-NOT: VecDiv
   //
   //  Not supported on any architecture.
   //
@@ -111,17 +82,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -134,17 +95,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.not() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -157,17 +108,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shl4() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -192,17 +133,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shr2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shr2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java
index 5d08998..0f04f73 100644
--- a/test/640-checker-double-simd/src/Main.java
+++ b/test/640-checker-double-simd/src/Main.java
@@ -30,12 +30,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.add(double) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(double) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.add(double) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -48,12 +43,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sub(double) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(double) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.sub(double) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -66,12 +56,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.mul(double) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.mul(double) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.mul(double) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -84,12 +69,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.div(double) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.div(double) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.div(double) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -102,12 +82,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -120,12 +95,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.abs() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.abs() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.abs() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -138,11 +108,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after)
-  /// CHECK-NOT: VecLoad
-  /// CHECK-NOT: VecStore
-  //
-  /// CHECK-START-MIPS64: void Main.conv(long[]) loop_optimization (after)
+  /// CHECK-START: void Main.conv(long[]) loop_optimization (after)
   /// CHECK-NOT: VecLoad
   /// CHECK-NOT: VecStore
   //
diff --git a/test/640-checker-float-simd/src/Main.java b/test/640-checker-float-simd/src/Main.java
index c7883f3..d4eef9f 100644
--- a/test/640-checker-float-simd/src/Main.java
+++ b/test/640-checker-float-simd/src/Main.java
@@ -30,12 +30,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.add(float) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(float) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.add(float) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -48,12 +43,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sub(float) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(float) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.sub(float) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -66,12 +56,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.mul(float) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.mul(float) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.mul(float) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -84,12 +69,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.div(float) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.div(float) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.div(float) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -102,12 +82,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -120,12 +95,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-MIPS64: void Main.abs() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.abs() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.abs() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -138,12 +108,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.conv(int[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.conv(int[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.conv(int[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
index aa230bf..85d8b1b 100644
--- a/test/640-checker-int-simd/src/Main.java
+++ b/test/640-checker-int-simd/src/Main.java
@@ -29,17 +29,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.add(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -52,17 +42,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sub(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -75,17 +55,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.mul(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -112,17 +82,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -135,17 +95,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.not() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -158,17 +108,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shl4() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -181,17 +121,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sar2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -204,17 +134,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shr2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shr2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -242,15 +162,7 @@
   /// CHECK-DAG: <<Get:i\d+>> ArrayGet                             loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:              ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shr32() loop_optimization (after)
-  /// CHECK-DAG: <<Get:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:              VecStore [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shr32() loop_optimization (after)
-  /// CHECK-DAG: <<Get:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:              VecStore [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr32() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shr32() loop_optimization (after)
   /// CHECK-DAG: <<Get:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:              VecStore [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr32() {
@@ -271,19 +183,7 @@
   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shr33() loop_optimization (after)
-  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shr33() loop_optimization (after)
-  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr33() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shr33() loop_optimization (after)
   /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
@@ -305,19 +205,7 @@
   /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shrMinus254() loop_optimization (after)
-  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after)
-  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shrMinus254() loop_optimization (after)
   /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
index c754f2a..bb4d0cb 100644
--- a/test/640-checker-long-simd/src/Main.java
+++ b/test/640-checker-long-simd/src/Main.java
@@ -29,12 +29,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.add(long) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(long) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.add(long) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -47,12 +42,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sub(long) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(long) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.sub(long) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -65,14 +55,15 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
+  //  Not directly supported for longs.
+  //
+  /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after)
+  /// CHECK-NOT: VecMul
+  //
   /// CHECK-START-MIPS64: void Main.mul(long) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  //  Not supported for longs.
-  /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after)
-  /// CHECK-NOT: VecMul
   static void mul(long x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -96,12 +87,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -114,12 +100,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.not() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -132,12 +113,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.shl4() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -150,12 +126,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.sar2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -168,12 +139,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.shr2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -201,11 +167,7 @@
   /// CHECK-DAG: <<Get:j\d+>> ArrayGet                             loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:              ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr64() loop_optimization (after)
-  /// CHECK-DAG: <<Get:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:              VecStore [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr64() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.shr64() loop_optimization (after)
   /// CHECK-DAG: <<Get:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:              VecStore [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr64() {
@@ -226,13 +188,7 @@
   /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr65() loop_optimization (after)
-  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shr65() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.shr65() loop_optimization (after)
   /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
@@ -254,13 +210,7 @@
   /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>]               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after)
-  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.shrMinus254() loop_optimization (after)
   /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]            loop:<<Loop>>      outer_loop:none
diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java
index e187397..2b4ba87 100644
--- a/test/640-checker-short-simd/src/Main.java
+++ b/test/640-checker-short-simd/src/Main.java
@@ -29,17 +29,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.add(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -52,17 +42,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sub(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -75,17 +55,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.mul(int) loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -99,6 +69,7 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.div(int) loop_optimization (after)
+  /// CHECK-NOT: VecDiv
   //
   //  Not supported on any architecture.
   //
@@ -111,17 +82,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.neg() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -134,17 +95,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.not() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -157,17 +108,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.shl4() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
@@ -180,17 +121,7 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.sar2() loop_optimization (after)
   /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
diff --git a/test/642-fp-callees/fp_callees.cc b/test/642-fp-callees/fp_callees.cc
index 600f969..17bb55b 100644
--- a/test/642-fp-callees/fp_callees.cc
+++ b/test/642-fp-callees/fp_callees.cc
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
+#include <android-base/logging.h>
+
 #include "base/casts.h"
-#include "base/logging.h"
 #include "jni.h"
 
 namespace art {
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index 823908c..d498bda 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -28,7 +28,7 @@
   /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitByte(byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitByte(byte[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
@@ -38,25 +38,6 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-ARM64: void Main.doitByte(byte[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
-  //
-  /// CHECK-START-MIPS64: void Main.doitByte(byte[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitByte(byte[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = (byte) Math.abs(x[i]);
@@ -84,7 +65,7 @@
   /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitShort(short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitShort(short[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
@@ -94,25 +75,6 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-ARM64: void Main.doitShort(short[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
-  //
-  /// CHECK-START-MIPS64: void Main.doitShort(short[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitShort(short[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = (short) Math.abs(x[i]);
@@ -147,7 +109,7 @@
   /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitInt(int[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitInt(int[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
@@ -157,25 +119,6 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-ARM64: void Main.doitInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
-  //
-  /// CHECK-START-MIPS64: void Main.doitInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                   loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitInt(int[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -188,7 +131,7 @@
   /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet                                   loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.doitLong(long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.doitLong(long[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad                                    loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
@@ -198,15 +141,6 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-MIPS64: void Main.doitLong(long[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                    loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                   loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitLong(long[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -219,7 +153,7 @@
   /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsFloat loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet                                    loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.doitFloat(float[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.doitFloat(float[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad                                     loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs                                      loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: VecStore                                    loop:<<Loop1>>      outer_loop:none
@@ -229,15 +163,6 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-MIPS64: void Main.doitFloat(float[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                     loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                      loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                    loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsFloat loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                    loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitFloat(float[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -250,7 +175,7 @@
   /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet                                     loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.doitDouble(double[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.doitDouble(double[]) loop_optimization (after)
   /// CHECK-DAG: VecLoad                                      loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
@@ -260,15 +185,6 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-MIPS64: void Main.doitDouble(double[]) loop_optimization (after)
-  /// CHECK-DAG: VecLoad                                      loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet                                     loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitDouble(double[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
diff --git a/test/646-checker-hadd-alt-byte/src/Main.java b/test/646-checker-hadd-alt-byte/src/Main.java
index 41aa40c..2ef340a 100644
--- a/test/646-checker-hadd-alt-byte/src/Main.java
+++ b/test/646-checker-hadd-alt-byte/src/Main.java
@@ -39,19 +39,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
@@ -86,19 +74,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
@@ -121,19 +97,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
@@ -170,19 +134,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
@@ -204,21 +156,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
@@ -252,21 +190,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/646-checker-hadd-alt-char/src/Main.java b/test/646-checker-hadd-alt-char/src/Main.java
index 8f879c7..2a1382d 100644
--- a/test/646-checker-hadd-alt-char/src/Main.java
+++ b/test/646-checker-hadd-alt-char/src/Main.java
@@ -39,19 +39,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
@@ -87,19 +75,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
@@ -125,19 +101,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
@@ -174,19 +138,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
@@ -211,21 +163,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
@@ -259,21 +197,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/646-checker-hadd-alt-short/src/Main.java b/test/646-checker-hadd-alt-short/src/Main.java
index b591081..4035b97 100644
--- a/test/646-checker-hadd-alt-short/src/Main.java
+++ b/test/646-checker-hadd-alt-short/src/Main.java
@@ -39,19 +39,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
@@ -86,19 +74,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
@@ -121,19 +97,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
@@ -170,19 +134,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
@@ -204,21 +156,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
@@ -252,21 +190,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<UShr>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/646-checker-hadd-byte/src/Main.java b/test/646-checker-hadd-byte/src/Main.java
index 4d259c4..ca22200 100644
--- a/test/646-checker-hadd-byte/src/Main.java
+++ b/test/646-checker-hadd-byte/src/Main.java
@@ -36,19 +36,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
@@ -83,19 +71,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
@@ -118,19 +94,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int8 rounded:true loop:<<Loop>> outer_loop:none
@@ -167,19 +131,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  packed_type:Uint8 rounded:true loop:<<Loop>> outer_loop:none
@@ -201,21 +153,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                       loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
@@ -249,21 +187,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint8 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                       loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index 55bb958..85c2fca 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -36,19 +36,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
@@ -74,19 +62,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
@@ -122,19 +98,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
@@ -157,19 +121,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
@@ -192,19 +144,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
@@ -231,19 +171,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Int16 rounded:true loop:<<Loop>> outer_loop:none
@@ -281,19 +209,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
@@ -330,19 +246,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                               loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] packed_type:Uint16 rounded:true loop:<<Loop>> outer_loop:none
@@ -365,21 +269,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Int16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
@@ -413,21 +303,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Shr>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] packed_type:Uint16 rounded:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<HAdd>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]         loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                               loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index 2188346..45949ae 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -27,19 +27,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
@@ -70,19 +58,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
@@ -102,19 +78,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
@@ -145,19 +109,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>      outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>      outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint8 loop:<<Loop>> outer_loop:none
@@ -177,14 +129,7 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<I100:i\d+>> IntConstant 100                      loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]        loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop:B\d+>>  outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] packed_type:Int8 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>       outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<I100:i\d+>> IntConstant 100                      loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]        loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop:B\d+>>  outer_loop:none
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index d92bdaf..9b05609 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -27,19 +27,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
@@ -59,19 +47,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
@@ -91,14 +67,7 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.doitMin100(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<I100:i\d+>> IntConstant 100                      loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]        loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>         outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin100(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.doitMin100(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<I100:i\d+>> IntConstant 100                      loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]        loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
index 598106e..66343ad 100644
--- a/test/651-checker-int-simd-minmax/src/Main.java
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -26,19 +26,7 @@
   /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
@@ -57,19 +45,7 @@
   /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index 91f2a2d..5f10ada 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -27,19 +27,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>         outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>         outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>         outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>         outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>         outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
@@ -70,19 +58,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>     outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>          outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>          outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>     outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>          outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>          outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>     outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>          outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
@@ -102,19 +78,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>         outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>         outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>         outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>         outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>    outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>         outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
@@ -145,19 +109,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>     outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>          outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>          outer_loop:none
-  //
-  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>     outer_loop:none
-  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>          outer_loop:none
-  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Max>>] loop:<<Loop>>          outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM,ARM64,MIPS64}: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop:B\d+>>     outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>          outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] packed_type:Uint16 loop:<<Loop>> outer_loop:none
@@ -177,14 +129,7 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.doitMin100(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<I100:i\d+>> IntConstant 100                      loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]        loop:none
-  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop:B\d+>>   outer_loop:none
-  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get>>,<<Repl>>] packed_type:Int16 loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},{{i\d+}},<<Min>>] loop:<<Loop>>        outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.doitMin100(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.doitMin100(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<I100:i\d+>> IntConstant 100                      loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>]        loop:none
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop:B\d+>>   outer_loop:none
diff --git a/test/655-jit-clinit/src/Main.java b/test/655-jit-clinit/src/Main.java
index 44b3154..2fb8f2a 100644
--- a/test/655-jit-clinit/src/Main.java
+++ b/test/655-jit-clinit/src/Main.java
@@ -23,7 +23,7 @@
     Foo.hotMethod();
   }
 
-  public native static boolean isJitCompiled(Class<?> cls, String methodName);
+  public native static boolean hasJitCompiledEntrypoint(Class<?> cls, String methodName);
   private native static boolean hasJit();
 }
 
@@ -36,7 +36,7 @@
 
   static {
     array = new Object[10000];
-    while (!Main.isJitCompiled(Foo.class, "hotMethod")) {
+    while (!Main.hasJitCompiledEntrypoint(Foo.class, "hotMethod")) {
       Foo.hotMethod();
       try {
         // Sleep to give a chance for the JIT to compile `hotMethod`.
diff --git a/test/656-checker-simd-opt/src/Main.java b/test/656-checker-simd-opt/src/Main.java
index 31d28e8..081e421 100644
--- a/test/656-checker-simd-opt/src/Main.java
+++ b/test/656-checker-simd-opt/src/Main.java
@@ -102,20 +102,7 @@
   /// CHECK-DAG: <<Add2>>       Add [<<Phi2>>,<<Get>>]     loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Add1>>       Add [<<Phi1>>,<<L1>>]      loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.longInductionReduction(long[]) loop_optimization (after)
-  /// CHECK-DAG: <<L0:j\d+>>    LongConstant 0               loop:none
-  /// CHECK-DAG: <<L1:j\d+>>    LongConstant 1               loop:none
-  /// CHECK-DAG: <<L2:j\d+>>    LongConstant 2               loop:none
-  /// CHECK-DAG: <<I0:i\d+>>    IntConstant 0                loop:none
-  /// CHECK-DAG: <<Get:j\d+>>   ArrayGet [{{l\d+}},<<I0>>]   loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>   VecReplicateScalar [<<Get>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>   VecSetScalars [<<L1>>]       loop:none
-  /// CHECK-DAG: <<Phi1:j\d+>>  Phi [<<L0>>,{{j\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>  Phi [<<Set>>,{{d\d+}}]       loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                VecAdd [<<Phi2>>,<<Rep>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                Add [<<Phi1>>,<<L2>>]        loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.longInductionReduction(long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.longInductionReduction(long[]) loop_optimization (after)
   /// CHECK-DAG: <<L0:j\d+>>    LongConstant 0               loop:none
   /// CHECK-DAG: <<L1:j\d+>>    LongConstant 1               loop:none
   /// CHECK-DAG: <<L2:j\d+>>    LongConstant 2               loop:none
@@ -144,18 +131,7 @@
   /// CHECK-DAG:                ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Add>>        Add [<<Phi>>,<<I1>>]                loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after)
-  /// CHECK-DAG: <<I0:i\d+>>    IntConstant 0                       loop:none
-  /// CHECK-DAG: <<I1:i\d+>>    IntConstant 1                       loop:none
-  /// CHECK-DAG: <<I4:i\d+>>    IntConstant 4                       loop:none
-  /// CHECK-DAG: <<Get:j\d+>>   ArrayGet [{{l\d+}},<<I0>>]          loop:none
-  /// CHECK-DAG: <<Cnv:i\d+>>   TypeConversion [<<Get>>]            loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>   VecReplicateScalar [<<Cnv>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<I0>>,{{i\d+}}]               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:                VecStore [{{l\d+}},<<Phi>>,<<Rep>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                Add [<<Phi>>,<<I4>>]                loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after)
   /// CHECK-DAG: <<I0:i\d+>>    IntConstant 0                       loop:none
   /// CHECK-DAG: <<I1:i\d+>>    IntConstant 1                       loop:none
   /// CHECK-DAG: <<I4:i\d+>>    IntConstant 4                       loop:none
@@ -183,19 +159,7 @@
   /// CHECK-DAG:                ArraySet [{{l\d+}},<<Phi>>,<<Cnv2>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Add>>        Add [<<Phi>>,<<I1>>]                 loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<I0:i\d+>>    IntConstant 0                       loop:none
-  /// CHECK-DAG: <<I4:i\d+>>    IntConstant 4                       loop:none
-  /// CHECK-DAG: <<L1:j\d+>>    LongConstant 1                      loop:none
-  /// CHECK-DAG: <<Cnv:i\d+>>   TypeConversion [<<L1>>]             loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>   VecReplicateScalar [<<Cnv>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>   Phi [<<I0>>,{{i\d+}}]               loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>  VecLoad [{{l\d+}},<<Phi>>]          loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Add:d\d+>>   VecAdd [<<Load>>,<<Rep>>]           loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                Add [<<Phi>>,<<I4>>]                loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after)
   /// CHECK-DAG: <<I0:i\d+>>    IntConstant 0                       loop:none
   /// CHECK-DAG: <<I4:i\d+>>    IntConstant 4                       loop:none
   /// CHECK-DAG: <<L1:j\d+>>    LongConstant 1                      loop:none
diff --git a/test/660-checker-simd-sad-byte/src/Main.java b/test/660-checker-simd-sad-byte/src/Main.java
index 877d718..594948b 100644
--- a/test/660-checker-simd-sad-byte/src/Main.java
+++ b/test/660-checker-simd-sad-byte/src/Main.java
@@ -99,18 +99,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons16>>]      loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -141,18 +130,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons16>>]      loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -185,18 +163,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons16>>]      loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -234,19 +201,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons16>>]      loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
@@ -283,19 +238,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons16>>]      loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16                 loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
diff --git a/test/660-checker-simd-sad-int/src/Main.java b/test/660-checker-simd-sad-int/src/Main.java
index d7d5a95..aa8431c 100644
--- a/test/660-checker-simd-sad-int/src/Main.java
+++ b/test/660-checker-simd-sad-int/src/Main.java
@@ -31,32 +31,14 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                              loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}                        loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]                   loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]                     loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Ld1:d\d+>>    VecLoad [{{l\d+}},<<I:i\d+>>]              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Ld2:d\d+>>    VecLoad [{{l\d+}},<<I>>]                   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]                      loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-ARM64: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                              loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]                   loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]                     loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Ld1:d\d+>>    VecLoad [{{l\d+}},<<I:i\d+>>]              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Ld2:d\d+>>    VecLoad [{{l\d+}},<<I>>]                   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]                      loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                              loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]                   loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]                     loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Ld1:d\d+>>    VecLoad [{{l\d+}},<<I:i\d+>>]              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Ld2:d\d+>>    VecLoad [{{l\d+}},<<I>>]                   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]                      loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]                       loop:<<Loop>> outer_loop:none
   private static int sadInt2Int(int[] x, int[] y) {
     int min_length = Math.min(x.length, y.length);
     int sad = 0;
@@ -106,32 +88,14 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                              loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}                        loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]                   loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]                     loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Ld1:d\d+>>    VecLoad [{{l\d+}},<<I:i\d+>>]              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Ld2:d\d+>>    VecLoad [{{l\d+}},<<I>>]                   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]                      loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-ARM64: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                              loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]                   loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]                     loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Ld1:d\d+>>    VecLoad [{{l\d+}},<<I:i\d+>>]              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Ld2:d\d+>>    VecLoad [{{l\d+}},<<I>>]                   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]                      loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                              loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]                   loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]                     loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Ld1:d\d+>>    VecLoad [{{l\d+}},<<I:i\d+>>]              loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Ld2:d\d+>>    VecLoad [{{l\d+}},<<I>>]                   loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi>>,<<Ld1>>,<<Ld2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]                      loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]                       loop:<<Loop>> outer_loop:none
   private static int sadInt2IntAlt2(int[] x, int[] y) {
     int min_length = Math.min(x.length, y.length);
     int sad = 0;
@@ -160,19 +124,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadInt2Long(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons4>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadInt2Long(int[], int[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadInt2Long(int[], int[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
@@ -209,19 +161,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons4>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
diff --git a/test/660-checker-simd-sad-long/src/Main.java b/test/660-checker-simd-sad-long/src/Main.java
index d080e0c..8281812 100644
--- a/test/660-checker-simd-sad-long/src/Main.java
+++ b/test/660-checker-simd-sad-long/src/Main.java
@@ -32,19 +32,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadLong2Long(long[], long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons2>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadLong2Long(long[], long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadLong2Long(long[], long[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
@@ -106,19 +94,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons2>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
@@ -155,19 +131,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons2>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
diff --git a/test/660-checker-simd-sad-short/src/Main.java b/test/660-checker-simd-sad-short/src/Main.java
index 4ab6682..16bcaba 100644
--- a/test/660-checker-simd-sad-short/src/Main.java
+++ b/test/660-checker-simd-sad-short/src/Main.java
@@ -66,18 +66,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2Int(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2Int(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2Int(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -108,18 +97,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -152,18 +130,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -201,19 +168,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadShort2Long(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadShort2Long(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadShort2Long(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
@@ -250,19 +205,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
diff --git a/test/660-checker-simd-sad-short2/src/Main.java b/test/660-checker-simd-sad-short2/src/Main.java
index 331f5ce..274892d 100644
--- a/test/660-checker-simd-sad-short2/src/Main.java
+++ b/test/660-checker-simd-sad-short2/src/Main.java
@@ -84,18 +84,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadCastedChar2Int(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadCastedChar2Int(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadCastedChar2Int(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -145,18 +134,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadCastedChar2IntAlt(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadCastedChar2IntAlt(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadCastedChar2IntAlt(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -208,18 +186,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadCastedChar2IntAlt2(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadCastedChar2IntAlt2(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadCastedChar2IntAlt2(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
@@ -276,19 +243,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadCastedChar2Long(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadCastedChar2Long(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadCastedChar2Long(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 0                 loop:none
@@ -344,19 +299,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: long Main.sadCastedChar2LongAt1(char[], char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<ConsL>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.sadCastedChar2LongAt1(char[], char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.sadCastedChar2LongAt1(char[], char[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsL:j\d+>>  LongConstant 1                 loop:none
diff --git a/test/660-checker-simd-sad-short3/src/Main.java b/test/660-checker-simd-sad-short3/src/Main.java
index ecda884..5016b65 100644
--- a/test/660-checker-simd-sad-short3/src/Main.java
+++ b/test/660-checker-simd-sad-short3/src/Main.java
@@ -33,19 +33,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntParamRight(short[], short) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Param>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntParamRight(short[], short) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntParamRight(short[], short) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
@@ -76,19 +64,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Param>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntParamLeft(short[], short) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Param:s\d+>>  ParameterValue                 loop:none
@@ -119,19 +95,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntConstRight(short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntConstRight(short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntConstRight(short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
@@ -162,19 +126,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntConstLeft(short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntConstLeft(short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntConstLeft(short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 32767              loop:none
@@ -205,19 +157,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Conv>>]  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
-  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntInvariantRight(short[], int) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
@@ -249,7 +189,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
@@ -259,17 +199,7 @@
   /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntInvariantLeft(short[], int) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<Conv:s\d+>>   TypeConversion [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<Conv>>]  loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Rep>>,<<Load>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
   private static int sadShort2IntInvariantLeft(short[] s, int val) {
     int sad = 0;
     short x = (short) (val + 1);
@@ -293,7 +223,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
@@ -304,18 +234,7 @@
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Add:d\d+>>    VecAdd [<<Load>>,<<Rep>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Add>>] loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntCastedExprRight(short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Add:d\d+>>    VecAdd [<<Load>>,<<Rep>>]      loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Load>>,<<Add>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
   private static int sadShort2IntCastedExprRight(short[] s) {
     int sad = 0;
     for (int i = 0; i < s.length; i++) {
@@ -339,7 +258,7 @@
   /// CHECK-DAG:                 Add [<<Phi2>>,<<Intrin>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]       loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
   /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
   /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
@@ -350,18 +269,7 @@
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Add:d\d+>>    VecAdd [<<Load>>,<<Rep>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Add>>,<<Load>>] loop:<<Loop>> outer_loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.sadShort2IntCastedExprLeft(short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons0:i\d+>>  IntConstant 0                  loop:none
-  /// CHECK-DAG: <<Cons8:i\d+>>  IntConstant 8                  loop:none
-  /// CHECK-DAG: <<ConsI:i\d+>>  IntConstant 110                loop:none
-  /// CHECK-DAG: <<Rep:d\d+>>    VecReplicateScalar [<<ConsI>>] loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [<<Cons0>>]      loop:none
-  /// CHECK-DAG: <<Phi1:i\d+>>   Phi [<<Cons0>>,{{i\d+}}]       loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set>>,{{d\d+}}]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<Phi1>>]    loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Add:d\d+>>    VecAdd [<<Load>>,<<Rep>>]      loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<SAD:d\d+>>    VecSADAccumulate [<<Phi2>>,<<Add>>,<<Load>>] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons8>>]       loop:<<Loop>>      outer_loop:none
   private static int sadShort2IntCastedExprLeft(short[] s) {
     int sad = 0;
     for (int i = 0; i < s.length; i++) {
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 1add0f1..3a0a049 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -62,33 +62,13 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM: int Main.reductionInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionInt(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}           loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-ARM64: int Main.reductionInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.reductionInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
   /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
   private static int reductionInt(int[] x) {
@@ -116,58 +96,19 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-ARM: int Main.reductionIntChain() loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                  loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionIntChain() loop_optimization (after)
   /// CHECK-DAG: <<Set1:d\d+>>   VecSetScalars [{{i\d+}}]       loop:none
   /// CHECK-DAG: <<Phi1:d\d+>>   Phi [<<Set1>>,{{d\d+}}]        loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<I1:i\d+>>] loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG:                 VecAdd [<<Phi1>>,<<Load1>>]    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I1>>,<<Cons2>>]         loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I1>>,{{i\d+}}]          loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: <<Red1:d\d+>>   VecReduce [<<Phi1>>]           loop:none
   /// CHECK-DAG: <<Extr1:i\d+>>  VecExtractScalar [<<Red1>>]    loop:none
   /// CHECK-DAG: <<Set2:d\d+>>   VecSetScalars [{{i\d+}}]       loop:none
   /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set2>>,{{d\d+}}]        loop:<<Loop2:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<I2:i\d+>>] loop:<<Loop2>>      outer_loop:none
   /// CHECK-DAG:                 VecAdd [<<Phi2>>,<<Load2>>]    loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I2>>,<<Cons2>>]         loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: <<Red2:d\d+>>   VecReduce [<<Phi2>>]           loop:none
-  /// CHECK-DAG: <<Extr2:i\d+>>  VecExtractScalar [<<Red2>>]    loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
-  //
-  /// CHECK-START-ARM64: int Main.reductionIntChain() loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                  loop:none
-  /// CHECK-DAG: <<Set1:d\d+>>   VecSetScalars [{{i\d+}}]       loop:none
-  /// CHECK-DAG: <<Phi1:d\d+>>   Phi [<<Set1>>,{{d\d+}}]        loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<I1:i\d+>>] loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi1>>,<<Load1>>]    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I1>>,<<Cons4>>]         loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: <<Red1:d\d+>>   VecReduce [<<Phi1>>]           loop:none
-  /// CHECK-DAG: <<Extr1:i\d+>>  VecExtractScalar [<<Red1>>]    loop:none
-  /// CHECK-DAG: <<Set2:d\d+>>   VecSetScalars [{{i\d+}}]       loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set2>>,{{d\d+}}]        loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<I2:i\d+>>] loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi2>>,<<Load2>>]    loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I2>>,<<Cons4>>]         loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG: <<Red2:d\d+>>   VecReduce [<<Phi2>>]           loop:none
-  /// CHECK-DAG: <<Extr2:i\d+>>  VecExtractScalar [<<Red2>>]    loop:none
-  //
-  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
-  //
-  /// CHECK-START-MIPS64: int Main.reductionIntChain() loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                  loop:none
-  /// CHECK-DAG: <<Set1:d\d+>>   VecSetScalars [{{i\d+}}]       loop:none
-  /// CHECK-DAG: <<Phi1:d\d+>>   Phi [<<Set1>>,{{d\d+}}]        loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load1:d\d+>>  VecLoad [{{l\d+}},<<I1:i\d+>>] loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi1>>,<<Load1>>]    loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I1>>,<<Cons4>>]         loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: <<Red1:d\d+>>   VecReduce [<<Phi1>>]           loop:none
-  /// CHECK-DAG: <<Extr1:i\d+>>  VecExtractScalar [<<Red1>>]    loop:none
-  /// CHECK-DAG: <<Set2:d\d+>>   VecSetScalars [{{i\d+}}]       loop:none
-  /// CHECK-DAG: <<Phi2:d\d+>>   Phi [<<Set2>>,{{d\d+}}]        loop:<<Loop2:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load2:d\d+>>  VecLoad [{{l\d+}},<<I2:i\d+>>] loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi2>>,<<Load2>>]    loop:<<Loop2>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I2>>,<<Cons4>>]         loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I2>>,{{i\d+}}]          loop:<<Loop2>>      outer_loop:none
   /// CHECK-DAG: <<Red2:d\d+>>   VecReduce [<<Phi2>>]           loop:none
   /// CHECK-DAG: <<Extr2:i\d+>>  VecExtractScalar [<<Red2>>]    loop:none
   //
@@ -199,38 +140,18 @@
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
-  /// CHECK-START-ARM: int Main.reductionIntToLoop(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionIntToLoop(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}           loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop1:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-ARM64: int Main.reductionIntToLoop(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.reductionIntToLoop(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop1:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop1>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]          loop:<<Loop1>>      outer_loop:none
   /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
   /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
   private static int reductionIntToLoop(int[] x) {
     int r = 0;
-    for (int i = 0; i < 4; i++) {
+    for (int i = 0; i < 8; i++) {
       r += x[i];
     }
     for (int i = r; i < 16; i++) {
@@ -250,17 +171,7 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM64: long Main.reductionLong(long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{j\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:j\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.reductionLong(long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.reductionLong(long[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{j\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
@@ -312,33 +223,13 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM: int Main.reductionIntM1(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionIntM1(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}           loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-ARM64: int Main.reductionIntM1(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.reductionIntM1(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
   /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
   private static int reductionIntM1(int[] x) {
@@ -360,17 +251,7 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM64: long Main.reductionLongM1(long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{j\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecAdd [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:j\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.reductionLongM1(long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.reductionLongM1(long[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{j\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
@@ -421,33 +302,13 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM: int Main.reductionMinusInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionMinusInt(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}           loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 VecSub [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-ARM64: int Main.reductionMinusInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecSub [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.reductionMinusInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{i\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecSub [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
   /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
   private static int reductionMinusInt(int[] x) {
@@ -469,17 +330,7 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM64: long Main.reductionMinusLong(long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{j\d+}}]      loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecSub [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:j\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: long Main.reductionMinusLong(long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: long Main.reductionMinusLong(long[]) loop_optimization (after)
   /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecSetScalars [{{j\d+}}]      loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
@@ -531,33 +382,13 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM: int Main.reductionMinInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionMinInt(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}           loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecReplicateScalar [{{i\d+}}] loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 VecMin [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-ARM64: int Main.reductionMinInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecReplicateScalar [{{i\d+}}] loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecMin [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.reductionMinInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecReplicateScalar [{{i\d+}}] loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecMin [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
   /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
   private static int reductionMinInt(int[] x) {
@@ -611,33 +442,13 @@
   /// CHECK-DAG:                 Add [<<Phi1>>,<<Cons1>>]      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 Return [<<Phi2>>]             loop:none
   //
-  /// CHECK-START-ARM: int Main.reductionMaxInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons2:i\d+>>  IntConstant 2                 loop:none
+  /// CHECK-START-{ARM,ARM64,MIPS64}: int Main.reductionMaxInt(int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Cons:i\d+>>   IntConstant {{2|4}}           loop:none
   /// CHECK-DAG: <<Set:d\d+>>    VecReplicateScalar [{{i\d+}}] loop:none
   /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:                 VecMax [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons2>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-ARM64: int Main.reductionMaxInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecReplicateScalar [{{i\d+}}] loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecMax [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
-  /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
-  //
-  /// CHECK-START-MIPS64: int Main.reductionMaxInt(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Cons4:i\d+>>  IntConstant 4                 loop:none
-  /// CHECK-DAG: <<Set:d\d+>>    VecReplicateScalar [{{i\d+}}] loop:none
-  /// CHECK-DAG: <<Phi:d\d+>>    Phi [<<Set>>,{{d\d+}}]        loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG: <<Load:d\d+>>   VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 VecMax [<<Phi>>,<<Load>>]     loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG:                 Add [<<I>>,<<Cons4>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:                 Add [<<I>>,<<Cons>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Red:d\d+>>    VecReduce [<<Phi>>]           loop:none
   /// CHECK-DAG: <<Extr:i\d+>>   VecExtractScalar [<<Red>>]    loop:none
   private static int reductionMaxInt(int[] x) {
@@ -743,9 +554,9 @@
     }
 
     // Test various reductions in loops.
-    int[] x0 = { 0, 0, 0, 0 };
-    int[] x1 = { 0, 0, 0, 1 };
-    int[] x2 = { 1, 1, 1, 1 };
+    int[] x0 = { 0, 0, 0, 0, 0, 0, 0, 0 };
+    int[] x1 = { 0, 0, 0, 1, 0, 0, 0, 0 };
+    int[] x2 = { 1, 1, 1, 1, 0, 0, 0, 0 };
     expectEquals(-74, reductionByte(xb));
     expectEquals(-27466, reductionShort(xs));
     expectEquals(38070, reductionChar(xc));
@@ -754,7 +565,7 @@
     expectEquals(120, reductionIntToLoop(x0));
     expectEquals(121, reductionIntToLoop(x1));
     expectEquals(118, reductionIntToLoop(x2));
-    expectEquals(-1205, reductionIntToLoop(xi));
+    expectEquals(-1310, reductionIntToLoop(xi));
     expectEquals(365750L, reductionLong(xl));
     expectEquals(-75, reductionByteM1(xb));
     expectEquals(-27467, reductionShortM1(xs));
diff --git a/test/665-checker-simd-zero/src/Main.java b/test/665-checker-simd-zero/src/Main.java
index 6cd6d64..5c581c4 100644
--- a/test/665-checker-simd-zero/src/Main.java
+++ b/test/665-checker-simd-zero/src/Main.java
@@ -24,13 +24,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zeroz(boolean[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zeroz(boolean[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zeroz(boolean[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -46,13 +40,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zerob(byte[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zerob(byte[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zerob(byte[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -68,13 +56,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zeroc(char[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zeroc(char[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zeroc(char[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -90,13 +72,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zeros(short[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zeros(short[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zeros(short[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -112,13 +88,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zeroi(int[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zeroi(int[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zeroi(int[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0                        loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -134,13 +104,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zerol(long[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0                       loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zerol(long[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zerol(long[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0                       loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -156,13 +120,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zerof(float[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0                      loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zerof(float[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zerof(float[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0                      loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
@@ -178,13 +136,7 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Zero>>] loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.zerod(double[]) loop_optimization (after)
-  /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0                     loop:none
-  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
-  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
-  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
-  //
-  /// CHECK-START-MIPS64: void Main.zerod(double[]) loop_optimization (after)
+  /// CHECK-START-{ARM64,MIPS64}: void Main.zerod(double[]) loop_optimization (after)
   /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0                     loop:none
   /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>]        loop:none
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/706-jit-skip-compilation/expected.txt b/test/667-jit-jni-stub/expected.txt
similarity index 100%
rename from test/706-jit-skip-compilation/expected.txt
rename to test/667-jit-jni-stub/expected.txt
diff --git a/test/667-jit-jni-stub/info.txt b/test/667-jit-jni-stub/info.txt
new file mode 100644
index 0000000..6f25c44
--- /dev/null
+++ b/test/667-jit-jni-stub/info.txt
@@ -0,0 +1 @@
+Tests for JITting and collecting JNI stubs.
diff --git a/test/667-jit-jni-stub/jit_jni_stub_test.cc b/test/667-jit-jni-stub/jit_jni_stub_test.cc
new file mode 100644
index 0000000..82e06fc
--- /dev/null
+++ b/test/667-jit-jni-stub/jit_jni_stub_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <jni.h>
+
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "mirror/class.h"
+#include "mirror/string.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace art {
+
+// Local class declared as a friend of JitCodeCache so that we can access its internals.
+class JitJniStubTestHelper {
+ public:
+  static bool isNextJitGcFull(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
+    CHECK(Runtime::Current()->GetJit() != nullptr);
+    jit::JitCodeCache* cache = Runtime::Current()->GetJit()->GetCodeCache();
+    MutexLock mu(self, cache->lock_);
+    return cache->ShouldDoFullCollection();
+  }
+};
+
+// Calls through to a static method with signature "()V".
+extern "C" JNIEXPORT
+void Java_Main_callThrough(JNIEnv* env, jclass, jclass klass, jstring methodName) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::string name = soa.Decode<mirror::String>(methodName)->ToModifiedUtf8();
+  jmethodID method = env->GetStaticMethodID(klass, name.c_str(), "()V");
+  CHECK(method != nullptr) << soa.Decode<mirror::Class>(klass)->PrettyDescriptor() << "." << name;
+  env->CallStaticVoidMethod(klass, method);
+}
+
+extern "C" JNIEXPORT
+void Java_Main_jitGc(JNIEnv*, jclass) {
+  CHECK(Runtime::Current()->GetJit() != nullptr);
+  jit::JitCodeCache* cache = Runtime::Current()->GetJit()->GetCodeCache();
+  ScopedObjectAccess soa(Thread::Current());
+  cache->GarbageCollectCache(Thread::Current());
+}
+
+extern "C" JNIEXPORT
+jboolean Java_Main_isNextJitGcFull(JNIEnv*, jclass) {
+  ScopedObjectAccess soa(Thread::Current());
+  return JitJniStubTestHelper::isNextJitGcFull(soa.Self());
+}
+
+}  // namespace art
diff --git a/test/706-jit-skip-compilation/run b/test/667-jit-jni-stub/run
old mode 100644
new mode 100755
similarity index 70%
copy from test/706-jit-skip-compilation/run
copy to test/667-jit-jni-stub/run
index 6c5720a..f235c6b
--- a/test/706-jit-skip-compilation/run
+++ b/test/667-jit-jni-stub/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (C) 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,6 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Run without the app image, otherwise the verification results will be cached
-# in the ArtMethod of the image and the test will be skewed.
-exec ${RUN} "${@}" --no-app-image
+# Disable AOT compilation of JNI stubs.
+# Ensure this test is not subject to unexpected code collection.
+${RUN} "${@}" --no-prebuild --no-dex2oat --runtime-option -Xjitinitialsize:32M
diff --git a/test/667-jit-jni-stub/src/Main.java b/test/667-jit-jni-stub/src/Main.java
new file mode 100644
index 0000000..794308d
--- /dev/null
+++ b/test/667-jit-jni-stub/src/Main.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    if (isAotCompiled(Main.class, "hasJit")) {
+      throw new Error("This test must be run with --no-prebuild --no-dex2oat!");
+    }
+    if (!hasJit()) {
+      return;
+    }
+
+    testCompilationUseAndCollection();
+    testMixedFramesOnStack();
+  }
+
+  public static void testCompilationUseAndCollection() {
+    // Test that callThrough() can be JIT-compiled.
+    assertFalse(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    assertFalse(hasJitCompiledCode(Main.class, "callThrough"));
+    ensureCompiledCallThroughEntrypoint(/* call */ true);
+    assertTrue(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+
+    // Use callThrough() once again now that the method has a JIT-compiled stub.
+    callThrough(Main.class, "doNothing");
+
+    // Test that GC with the JIT-compiled stub on the stack does not collect it.
+    // Also tests stack walk over the JIT-compiled stub.
+    callThrough(Main.class, "testGcWithCallThroughStubOnStack");
+
+    // Test that, when marking used methods before a full JIT GC, a single execution
+    // of the GenericJNI trampoline can save the compiled stub from being collected.
+    testSingleInvocationTriggersRecompilation();
+
+    // Test that the JNI compiled stub can actually be collected.
+    testStubCanBeCollected();
+  }
+
+  public static void testGcWithCallThroughStubOnStack() {
+    // Check that this method was called via JIT-compiled callThrough() stub.
+    assertTrue(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    // This assertion also exercises stack walk over the JIT-compiled callThrough() stub.
+    assertTrue(new Throwable().getStackTrace()[1].getMethodName().equals("callThrough"));
+
+    doJitGcsUntilFullJitGcIsScheduled();
+    // The callThrough() on the stack above this method is using the compiled stub,
+    // so the JIT GC should not remove the compiled code.
+    jitGc();
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+  }
+
+  public static void testSingleInvocationTriggersRecompilation() {
+    // After scheduling a full JIT GC, single call through the GenericJNI
+    // trampoline should ensure that the compiled stub is used again.
+    doJitGcsUntilFullJitGcIsScheduled();
+    callThrough(Main.class, "doNothing");
+    ensureCompiledCallThroughEntrypoint(/* call */ false);  // Wait for the compilation task to run.
+    assertTrue(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    jitGc();  // This JIT GC should not collect the callThrough() stub.
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+  }
+
+  public static void testMixedFramesOnStack() {
+    // Starts without a compiled JNI stub for callThrough().
+    assertFalse(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    assertFalse(hasJitCompiledCode(Main.class, "callThrough"));
+    callThrough(Main.class, "testMixedFramesOnStackStage2");
+    // We have just returned through the JIT-compiled JNI stub, so it must still
+    // be compiled (though not necessarily with the entrypoint pointing to it).
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+    // Though the callThrough() is on the stack, that frame is using the GenericJNI
+    // and does not prevent the collection of the JNI stub.
+    testStubCanBeCollected();
+  }
+
+  public static void testMixedFramesOnStackStage2() {
+    // We cannot assert that callThrough() has no JIT compiled stub as that check
+    // may race against the compilation task. Just check the caller.
+    assertTrue(new Throwable().getStackTrace()[1].getMethodName().equals("callThrough"));
+    // Now ensure that the JNI stub is compiled and used.
+    ensureCompiledCallThroughEntrypoint(/* call */ true);
+    callThrough(Main.class, "testMixedFramesOnStackStage3");
+  }
+
+  public static void testMixedFramesOnStackStage3() {
+    // Check that this method was called via JIT-compiled callThrough() stub.
+    assertTrue(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    // This assertion also exercises stack walk over the JIT-compiled callThrough() stub.
+    assertTrue(new Throwable().getStackTrace()[1].getMethodName().equals("callThrough"));
+    // For a good measure, try a JIT GC.
+    jitGc();
+  }
+
+  public static void testStubCanBeCollected() {
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+    doJitGcsUntilFullJitGcIsScheduled();
+    assertFalse(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+    jitGc();  // JIT GC without callThrough() on the stack should collect the callThrough() stub.
+    assertFalse(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    assertFalse(hasJitCompiledCode(Main.class, "callThrough"));
+  }
+
+  public static void doJitGcsUntilFullJitGcIsScheduled() {
+    // We enter with a compiled stub for callThrough() but we also need the entrypoint to be set.
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+    ensureCompiledCallThroughEntrypoint(/* call */ true);
+    // Perform JIT GC until the next GC is marked to do full collection.
+    do {
+      assertTrue(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+      callThrough(Main.class, "jitGc");  // JIT GC with callThrough() safely on the stack.
+    } while (!isNextJitGcFull());
+    // The JIT GC before the full collection resets entrypoints and waits to see
+    // if the methods are still in use.
+    assertFalse(hasJitCompiledEntrypoint(Main.class, "callThrough"));
+    assertTrue(hasJitCompiledCode(Main.class, "callThrough"));
+  }
+
+  public static void ensureCompiledCallThroughEntrypoint(boolean call) {
+    int count = 0;
+    while (!hasJitCompiledEntrypoint(Main.class, "callThrough")) {
+      // If `call` is true, also exercise the `callThrough()` method to increase hotness.
+      // Ramp-up the number of calls we do up to 1 << 12.
+      final int rampUpCutOff = 12;
+      int limit = call ? 1 << Math.min(count, rampUpCutOff) : 0;
+      for (int i = 0; i < limit; ++i) {
+        callThrough(Main.class, "doNothing");
+      }
+      try {
+        // Sleep to give a chance for the JIT to compile `callThrough` stub.
+        // After the ramp-up phase, give the JIT even more time to compile.
+        Thread.sleep(count >= rampUpCutOff ? 200 : 100);
+      } catch (Exception e) {
+        // Ignore
+      }
+      if (++count == 50) {
+        throw new Error("TIMEOUT");
+      }
+    };
+  }
+
+  public static void assertTrue(boolean value) {
+    if (!value) {
+      throw new AssertionError("Expected true!");
+    }
+  }
+
+  public static void assertFalse(boolean value) {
+    if (value) {
+      throw new AssertionError("Expected false!");
+    }
+  }
+
+  public static void doNothing() { }
+  public static void throwError() { throw new Error(); }
+
+  // Note that the callThrough()'s shorty differs from shorties of the other
+  // native methods used in this test because of the return type `void.`
+  public native static void callThrough(Class<?> cls, String methodName);
+
+  public native static void jitGc();
+  public native static boolean isNextJitGcFull();
+
+  public native static boolean isAotCompiled(Class<?> cls, String methodName);
+  public native static boolean hasJitCompiledEntrypoint(Class<?> cls, String methodName);
+  public native static boolean hasJitCompiledCode(Class<?> cls, String methodName);
+  private native static boolean hasJit();
+}
diff --git a/test/671-npe-field-opts/expected.txt b/test/671-npe-field-opts/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/671-npe-field-opts/expected.txt
diff --git a/test/671-npe-field-opts/info.txt b/test/671-npe-field-opts/info.txt
new file mode 100644
index 0000000..f1e5846
--- /dev/null
+++ b/test/671-npe-field-opts/info.txt
@@ -0,0 +1,3 @@
+Regression test for the compiler, which used to
+re-order or remove field access in a way that would confuse the runtime
+when validating a NPE.
diff --git a/test/671-npe-field-opts/src/Main.java b/test/671-npe-field-opts/src/Main.java
new file mode 100644
index 0000000..a5e81ce
--- /dev/null
+++ b/test/671-npe-field-opts/src/Main.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  static Main obj;
+  // Make 'doCheck' volatile to prevent optimizations
+  // in $noinline$bar like LICM that could hoist the null check
+  // out of the loop.
+  static volatile boolean doCheck = true;
+
+  float floatField;
+  int intField;
+
+  public static void main(String[] args) {
+    try {
+      $noinline$bar();
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      check(e, 29, 52, "$noinline$bar");
+    }
+
+    try {
+      $noinline$foo();
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+      check(e, 36, 44, "$noinline$foo");
+    }
+  }
+
+  public static float $noinline$foo() {
+    int v1 = obj.intField;
+    float v2 = obj.floatField;
+    return v2;
+  }
+
+  public static float $noinline$bar() {
+    float a = 0;
+    while (doCheck) {
+      float f = obj.floatField;
+      int i = obj.intField;
+      a = (float)i + f;
+    }
+    return a;
+  }
+
+  static void check(NullPointerException npe, int mainLine, int methodLine, String methodName) {
+    StackTraceElement[] trace = npe.getStackTrace();
+    checkElement(trace[0], "Main", methodName, "Main.java", methodLine);
+    checkElement(trace[1], "Main", "main", "Main.java", mainLine);
+  }
+
+  static void checkElement(StackTraceElement element,
+                           String declaringClass, String methodName,
+                           String fileName, int lineNumber) {
+    assertEquals(declaringClass, element.getClassName());
+    assertEquals(methodName, element.getMethodName());
+    assertEquals(fileName, element.getFileName());
+    assertEquals(lineNumber, element.getLineNumber());
+  }
+
+  static void assertEquals(Object expected, Object actual) {
+    if (!expected.equals(actual)) {
+      String msg = "Expected \"" + expected + "\" but got \"" + actual + "\"";
+      throw new AssertionError(msg);
+    }
+  }
+
+  static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+}
diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java
index d21596d..25e4fad 100644
--- a/test/706-checker-scheduler/src/Main.java
+++ b/test/706-checker-scheduler/src/Main.java
@@ -523,7 +523,71 @@
     return res;
   }
 
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static final int ARRAY_SIZE = 32;
+
+  // Check that VecReplicateScalar is not reordered.
+  /// CHECK-START-ARM64: void Main.testVecReplicateScalar() scheduler (before)
+  /// CHECK:     Phi                loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK:     NewArray           loop:<<Loop>>      outer_loop:none
+  /// CHECK:     VecReplicateScalar loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.testVecReplicateScalar() scheduler (after)
+  /// CHECK:     Phi                loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK:     NewArray           loop:<<Loop>>      outer_loop:none
+  /// CHECK:     VecReplicateScalar loop:<<Loop>>      outer_loop:none
+  private static void testVecReplicateScalar() {
+    for (int j = 0; j <= 8; j++) {
+      int[] a = new int[ARRAY_SIZE];
+      for (int i = 0; i < a.length; i++) {
+        a[i] += 1;
+      }
+      for (int i = 0; i < a.length; i++) {
+        expectEquals(1, a[i]);
+      }
+    }
+  }
+
+  // Check that VecSetScalars, VecReduce, VecExtractScalar are not reordered.
+  /// CHECK-START-ARM64: void Main.testVecSetScalars() scheduler (before)
+  /// CHECK:     Phi                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK:     NewArray             loop:<<Loop>>      outer_loop:none
+  /// CHECK:     VecSetScalars        loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK:     VecReduce            loop:<<Loop>>      outer_loop:none
+  /// CHECK:     VecExtractScalar     loop:<<Loop>>      outer_loop:none
+  /// CHECK:     InvokeStaticOrDirect loop:<<Loop>>      outer_loop:none
+  /// CHECK:     InvokeStaticOrDirect loop:<<Loop>>      outer_loop:none
+
+  /// CHECK-START-ARM64: void Main.testVecSetScalars() scheduler (after)
+  /// CHECK:     Phi                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK:     NewArray             loop:<<Loop>>      outer_loop:none
+  /// CHECK:     VecSetScalars        loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK:     VecReduce            loop:<<Loop>>      outer_loop:none
+  /// CHECK:     VecExtractScalar     loop:<<Loop>>      outer_loop:none
+  /// CHECK:     InvokeStaticOrDirect loop:<<Loop>>      outer_loop:none
+  /// CHECK:     InvokeStaticOrDirect loop:<<Loop>>      outer_loop:none
+  private static void testVecSetScalars() {
+    for (int j = 0; j <= 8; j++) {
+      int[] a = new int[ARRAY_SIZE];
+      int s = 5;
+      for (int i = 0; i < ARRAY_SIZE; i++) {
+        s+=a[i];
+      }
+      expectEquals(a[0], 0);
+      expectEquals(s, 5);
+    }
+  }
+
   public static void main(String[] args) {
+    testVecSetScalars();
+    testVecReplicateScalar();
     if ((arrayAccess() + intDiv(10)) != -35) {
       System.out.println("FAIL");
     }
diff --git a/test/706-jit-skip-compilation/info.txt b/test/706-jit-skip-compilation/info.txt
deleted file mode 100644
index e9ef86b..0000000
--- a/test/706-jit-skip-compilation/info.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-Regression test for the JIT crashing when compiling a method with invalid
-dead dex code. For not compilable methods we don't gather samples and we don't
-trigger JIT compilation. However kAccDontBotherCompile is not persisted in the
-oat file and so we may end up compiling a method which we shouldn't.
diff --git a/test/706-jit-skip-compilation/smali/errclass.smali b/test/706-jit-skip-compilation/smali/errclass.smali
deleted file mode 100644
index 410504c..0000000
--- a/test/706-jit-skip-compilation/smali/errclass.smali
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (C) 2016 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-.class public LErrClass;
-
-.super Ljava/lang/Object;
-
-.method public static errMethod()J
-   .registers 8
-   const/4 v0, 0x0
-   const/4 v3, 0x0
-   aget v1, v0, v3  # v0 is null, this will alays throw and the invalid code
-                    # below will not be verified.
-   move v3, v4
-   move-wide/from16 v6, v2  # should trigger a verification error if verified as
-                            # v3 is a single register but used as a pair here.
-   return v6
-.end method
-
-# Add a field to work around demerger bug b/18051191.
-#   Failure to verify dex file '...': Offset(552) should be zero when size is zero for field-ids.
-.field private a:I
diff --git a/test/706-jit-skip-compilation/src/Main.java b/test/706-jit-skip-compilation/src/Main.java
deleted file mode 100644
index aa84724..0000000
--- a/test/706-jit-skip-compilation/src/Main.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-
-public class Main {
-  public static void main(String[] args) throws Exception {
-    System.loadLibrary(args[0]);
-    Class<?> c = Class.forName("ErrClass");
-    Method m = c.getMethod("errMethod");
-
-    // Print the counter before invokes. The golden file expects this to be 0.
-    int hotnessCounter = getHotnessCounter(c, "errMethod");
-    if (hotnessCounter != 0) {
-      throw new RuntimeException("Unexpected hotnessCounter: " + hotnessCounter);
-    }
-
-    // Loop enough to make sure the interpreter reports invocations count.
-    long result = 0;
-    for (int i = 0; i < 10000; i++) {
-      try {
-        result += (Long)m.invoke(null);
-        hotnessCounter = getHotnessCounter(c, "errMethod");
-        if (hotnessCounter != 0) {
-          throw new RuntimeException(
-            "Unexpected hotnessCounter: " + hotnessCounter);
-        }
-
-      } catch (InvocationTargetException e) {
-        if (!(e.getCause() instanceof NullPointerException)) {
-          throw e;
-        }
-      }
-    }
-
-    // Not compilable methods should not increase their hotness counter.
-    if (hotnessCounter != 0) {
-      throw new RuntimeException("Unexpected hotnessCounter: " + hotnessCounter);
-    }
-  }
-
-  public static native int getHotnessCounter(Class cls, String method_name);
-}
diff --git a/test/711-checker-type-conversion/expected.txt b/test/711-checker-type-conversion/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/711-checker-type-conversion/expected.txt
diff --git a/test/711-checker-type-conversion/info.txt b/test/711-checker-type-conversion/info.txt
new file mode 100644
index 0000000..5b63572
--- /dev/null
+++ b/test/711-checker-type-conversion/info.txt
@@ -0,0 +1 @@
+Tests for type conversion elimination.
diff --git a/test/711-checker-type-conversion/src/Main.java b/test/711-checker-type-conversion/src/Main.java
new file mode 100644
index 0000000..ae58200
--- /dev/null
+++ b/test/711-checker-type-conversion/src/Main.java
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertByteEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertShortEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      // Values are cast to int to display numeric values instead of
+      // (UTF-16 encoded) characters.
+      throw new Error("Expected: " + (int)expected + ", found: " + (int)result);
+    }
+  }
+
+  /// CHECK-START: byte Main.getByte1() constant_folding (before)
+  /// CHECK: TypeConversion
+  /// CHECK: TypeConversion
+  /// CHECK: Add
+  /// CHECK: TypeConversion
+
+  /// CHECK-START: byte Main.getByte1() constant_folding (after)
+  /// CHECK-NOT: TypeConversion
+  /// CHECK-NOT: Add
+
+  static byte getByte1() {
+    int i = -2;
+    int j = -3;
+    return (byte)((byte)i + (byte)j);
+  }
+
+  /// CHECK-START: byte Main.getByte2() constant_folding (before)
+  /// CHECK: TypeConversion
+  /// CHECK: TypeConversion
+  /// CHECK: Add
+  /// CHECK: TypeConversion
+
+  /// CHECK-START: byte Main.getByte2() constant_folding (after)
+  /// CHECK-NOT: TypeConversion
+  /// CHECK-NOT: Add
+
+  static byte getByte2() {
+    int i = -100;
+    int j = -101;
+    return (byte)((byte)i + (byte)j);
+  }
+
+  /// CHECK-START: byte Main.getByte3() constant_folding (before)
+  /// CHECK: TypeConversion
+  /// CHECK: TypeConversion
+  /// CHECK: Add
+  /// CHECK: TypeConversion
+
+  /// CHECK-START: byte Main.getByte2() constant_folding (after)
+  /// CHECK-NOT: TypeConversion
+  /// CHECK-NOT: Add
+
+  static byte getByte3() {
+    long i = 0xabcdabcdabcdL;
+    return (byte)((byte)i + (byte)i);
+  }
+
+  static byte byteVal = -1;
+  static short shortVal = -1;
+  static char charVal = 0xffff;
+  static int intVal = -1;
+
+  static byte[] byteArr = { 0 };
+  static short[] shortArr = { 0 };
+  static char[] charArr = { 0 };
+  static int[] intArr = { 0 };
+
+  static byte $noinline$getByte() {
+    return byteVal;
+  }
+
+  static short $noinline$getShort() {
+    return shortVal;
+  }
+
+  static char $noinline$getChar() {
+    return charVal;
+  }
+
+  static int $noinline$getInt() {
+    return intVal;
+  }
+
+  static boolean sFlag = true;
+
+  /// CHECK-START: void Main.byteToShort() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void byteToShort() {
+    shortArr[0] = 0;
+    if (sFlag) {
+      shortArr[0] = $noinline$getByte();
+    }
+  }
+
+  /// CHECK-START: void Main.byteToChar() instruction_simplifier$before_codegen (after)
+  /// CHECK: TypeConversion
+  private static void byteToChar() {
+    charArr[0] = 0;
+    if (sFlag) {
+      charArr[0] = (char)$noinline$getByte();
+    }
+  }
+
+  /// CHECK-START: void Main.byteToInt() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void byteToInt() {
+    intArr[0] = 0;
+    if (sFlag) {
+      intArr[0] = $noinline$getByte();
+    }
+  }
+
+  /// CHECK-START: void Main.charToByte() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void charToByte() {
+    byteArr[0] = 0;
+    if (sFlag) {
+      byteArr[0] = (byte)$noinline$getChar();
+    }
+  }
+
+  /// CHECK-START: void Main.charToShort() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void charToShort() {
+    shortArr[0] = 0;
+    if (sFlag) {
+      shortArr[0] = (short)$noinline$getChar();
+    }
+  }
+
+  /// CHECK-START: void Main.charToInt() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void charToInt() {
+    intArr[0] = 0;
+    if (sFlag) {
+      intArr[0] = $noinline$getChar();
+    }
+  }
+
+  /// CHECK-START: void Main.shortToByte() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void shortToByte() {
+    byteArr[0] = 0;
+    if (sFlag) {
+      byteArr[0] = (byte)$noinline$getShort();
+    }
+  }
+
+  /// CHECK-START: void Main.shortToChar() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void shortToChar() {
+    charArr[0] = 0;
+    if (sFlag) {
+      charArr[0] = (char)$noinline$getShort();
+    }
+  }
+
+  /// CHECK-START: void Main.shortToInt() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void shortToInt() {
+    intArr[0] = 0;
+    if (sFlag) {
+      intArr[0] = $noinline$getShort();
+    }
+  }
+
+  /// CHECK-START: void Main.intToByte() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void intToByte() {
+    byteArr[0] = 0;
+    if (sFlag) {
+      byteArr[0] = (byte)$noinline$getInt();
+    }
+  }
+
+  /// CHECK-START: void Main.intToShort() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void intToShort() {
+    shortArr[0] = 0;
+    if (sFlag) {
+      shortArr[0] = (short)$noinline$getInt();
+    }
+  }
+
+  /// CHECK-START: void Main.intToChar() instruction_simplifier$before_codegen (after)
+  /// CHECK-NOT: TypeConversion
+  private static void intToChar() {
+    charArr[0] = 0;
+    if (sFlag) {
+      charArr[0] = (char)$noinline$getInt();
+    }
+  }
+
+  public static void main(String[] args) {
+    assertByteEquals(getByte1(), (byte)-5);
+    assertByteEquals(getByte2(), (byte)(-201));
+    assertByteEquals(getByte3(), (byte)(0xcd + 0xcd));
+
+    byteToShort();
+    assertShortEquals(shortArr[0], (short)-1);
+    byteToChar();
+    assertCharEquals(charArr[0], (char)-1);
+    byteToInt();
+    assertIntEquals(intArr[0], -1);
+    charToByte();
+    assertByteEquals(byteArr[0], (byte)-1);
+    charToShort();
+    assertShortEquals(shortArr[0], (short)-1);
+    charToInt();
+    assertIntEquals(intArr[0], 0xffff);
+    shortToByte();
+    assertByteEquals(byteArr[0], (byte)-1);
+    shortToChar();
+    assertCharEquals(charArr[0], (char)-1);
+    shortToInt();
+    assertIntEquals(intArr[0], -1);
+    intToByte();
+    assertByteEquals(byteArr[0], (byte)-1);
+    intToShort();
+    assertShortEquals(shortArr[0], (short)-1);
+    intToChar();
+    assertCharEquals(charArr[0], (char)-1);
+  }
+}
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
index 19312b4..cab0abf 100644
--- a/test/900-hello-plugin/load_unload.cc
+++ b/test/900-hello-plugin/load_unload.cc
@@ -17,9 +17,10 @@
 #include <jni.h>
 #include <stdio.h>
 
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+
 #include "art_method-inl.h"
-#include "base/logging.h"
-#include "base/macros.h"
 #include "java_vm_ext.h"
 #include "runtime.h"
 
diff --git a/test/936-search-onload/search_onload.cc b/test/936-search-onload/search_onload.cc
index 90d87e0..23cea83 100644
--- a/test/936-search-onload/search_onload.cc
+++ b/test/936-search-onload/search_onload.cc
@@ -18,8 +18,9 @@
 
 #include <inttypes.h>
 
-#include "android-base/stringprintf.h"
-#include "base/logging.h"
+#include <android-base/macros.h>
+#include <android-base/stringprintf.h>
+
 #include "base/macros.h"
 #include "jni.h"
 #include "jvmti.h"
diff --git a/test/983-source-transform-verify/source_transform.cc b/test/983-source-transform-verify/source_transform.cc
index ca3f88b..3010d7a 100644
--- a/test/983-source-transform-verify/source_transform.cc
+++ b/test/983-source-transform-verify/source_transform.cc
@@ -25,7 +25,6 @@
 #include "jni.h"
 #include "jvmti.h"
 
-#include "base/logging.h"
 #include "base/macros.h"
 #include "bytecode_utils.h"
 #include "dex_file.h"
diff --git a/test/AllFields/AllFields.java b/test/AllFields/AllFields.java
index d5eac8f..24f8ba1 100644
--- a/test/AllFields/AllFields.java
+++ b/test/AllFields/AllFields.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-class AllFields {
+public class AllFields {
     static boolean sZ;
     static byte sB;
     static char sC;
diff --git a/test/AllFields/AllFieldsSub.java b/test/AllFields/AllFieldsSub.java
new file mode 100644
index 0000000..d5f933f
--- /dev/null
+++ b/test/AllFields/AllFieldsSub.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class AllFieldsSub extends AllFields { }
diff --git a/test/AllFields/AllFieldsUnrelated.java b/test/AllFields/AllFieldsUnrelated.java
new file mode 100644
index 0000000..4db66b1
--- /dev/null
+++ b/test/AllFields/AllFieldsUnrelated.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class AllFieldsUnrelated { }
diff --git a/test/Android.bp b/test/Android.bp
index 8f29251..01e424d 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -364,8 +364,9 @@
         "141-class-unload/jni_unload.cc",
         "148-multithread-gc-annotations/gc_coverage.cc",
         "149-suspend-all-stress/suspend_all.cc",
-        "203-multi-checkpoint/multi_checkpoint.cc",
         "154-gc-loop/heap_interface.cc",
+        "167-visit-locks/visit_locks.cc",
+        "203-multi-checkpoint/multi_checkpoint.cc",
         "454-get-vreg/get_vreg_jni.cc",
         "457-regs/regs_jni.cc",
         "461-get-reference-vreg/get_reference_vreg_jni.cc",
@@ -384,6 +385,7 @@
         "656-annotation-lookup-generic-jni/test.cc",
         "661-oat-writer-layout/oat_writer_layout.cc",
         "664-aget-verifier/aget-verifier.cc",
+        "667-jit-jni-stub/jit_jni_stub_test.cc",
         "708-jit-cache-churn/jit.cc",
     ],
     shared_libs: [
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index df497c1..1eed80e 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -16,9 +16,11 @@
 
 #include "jni.h"
 
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+
 #include "art_method-inl.h"
 #include "base/enums.h"
-#include "base/logging.h"
 #include "dex_file-inl.h"
 #include "instrumentation.h"
 #include "jit/jit.h"
@@ -152,10 +154,10 @@
   return method->GetOatMethodQuickCode(kRuntimePointerSize) != nullptr;
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_isJitCompiled(JNIEnv* env,
-                                                              jclass,
-                                                              jclass cls,
-                                                              jstring method_name) {
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasJitCompiledEntrypoint(JNIEnv* env,
+                                                                         jclass,
+                                                                         jclass cls,
+                                                                         jstring method_name) {
   jit::Jit* jit = GetJitIfEnabled();
   if (jit == nullptr) {
     return false;
@@ -169,6 +171,23 @@
   return jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode());
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasJitCompiledCode(JNIEnv* env,
+                                                                   jclass,
+                                                                   jclass cls,
+                                                                   jstring method_name) {
+  jit::Jit* jit = GetJitIfEnabled();
+  if (jit == nullptr) {
+    return false;
+  }
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ArtMethod* method = soa.Decode<mirror::Class>(cls)->FindDeclaredDirectMethodByName(
+        chars.c_str(), kRuntimePointerSize);
+  return jit->GetCodeCache()->ContainsMethod(method);
+}
+
 extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
                                                              jclass,
                                                              jclass cls,
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index 80a2780..046b1fb 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -16,7 +16,9 @@
 
 #include "jni.h"
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
+#include "base/mutex.h"
 #include "dex_file-inl.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 31f43fc..4844d1e 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -184,6 +184,10 @@
     elif [ "x$1" = "x--prebuild" ]; then
         PREBUILD="y"
         shift
+    elif [ "x$1" = "x--compact-dex-level" ]; then
+        shift
+        COMPILE_FLAGS="${COMPILE_FLAGS} --compact-dex-level=$1"
+        shift
     elif [ "x$1" = "x--jvmti-redefine-stress" ]; then
         # APP_IMAGE doesn't really work with jvmti redefine stress
         USE_JVMTI="y"
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 5b2ebf5..ae1830a 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -237,7 +237,8 @@
         "tests": ["604-hot-static-interface",
                   "612-jit-dex-cache",
                   "613-inlining-dex-cache",
-                  "626-set-resolved-string"],
+                  "626-set-resolved-string",
+                  "638-checker-inline-cache-intrinsic"],
         "variant": "trace  | stream",
         "description": ["These tests expect JIT compilation, which is",
                         "suppressed when tracing."]
@@ -382,7 +383,7 @@
         "tests": ["629-vdex-speed",
                   "634-vdex-duplicate"],
         "description": ["Profile driven dexlayout does not work with vdex or dex verifier."],
-        "variant": "speed-profile"
+        "variant": "speed-profile | cdex-fast"
     },
     {
         "test_patterns": ["616-cha.*"],
@@ -648,5 +649,11 @@
         "tests": "661-oat-writer-layout",
         "variant": "interp-ac | interpreter | jit | no-dex2oat | no-prebuild | no-image | trace",
         "description": ["Test is designed to only check --compiler-filter=speed"]
+    },
+    {
+        "tests": ["628-vdex", "975-iface-private"],
+        "variant": "cdex-fast",
+        "description": ["CompactDex doesn't yet work with input-vdex or 975-iface private"]
     }
+
 ]
diff --git a/test/run-test b/test/run-test
index fdb2ee4..75fe15c 100755
--- a/test/run-test
+++ b/test/run-test
@@ -225,6 +225,11 @@
         run_args="${run_args} --prebuild"
         prebuild_mode="yes"
         shift;
+    elif [ "x$1" = "x--compact-dex-level" ]; then
+        option="$1"
+        shift
+        run_args="${run_args} $option $1"
+        shift;
     elif [ "x$1" = "x--strip-dex" ]; then
         run_args="${run_args} --strip-dex"
         shift;
@@ -660,6 +665,7 @@
         echo "    -Xcompiler-option     Pass an option to the compiler."
         echo "    --build-option        Pass an option to the build script."
         echo "    --runtime-option      Pass an option to the runtime."
+        echo "    --compact-dex-level   Specify a compact dex level to the compiler."
         echo "    --debug               Wait for the default debugger to attach."
         echo "    --debug-agent <agent-path>"
         echo "                          Wait for the given debugger agent to attach. Currently"
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index e750382..9399857 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -141,6 +141,7 @@
   VARIANT_TYPE_DICT['debuggable'] = {'ndebuggable', 'debuggable'}
   VARIANT_TYPE_DICT['gc'] = {'gcstress', 'gcverify', 'cms'}
   VARIANT_TYPE_DICT['prebuild'] = {'no-prebuild', 'no-dex2oat', 'prebuild'}
+  VARIANT_TYPE_DICT['cdex_level'] = {'cdex-none', 'cdex-fast'}
   VARIANT_TYPE_DICT['relocate'] = {'relocate-npatchoat', 'relocate', 'no-relocate'}
   VARIANT_TYPE_DICT['jni'] = {'jni', 'forcecopy', 'checkjni'}
   VARIANT_TYPE_DICT['address_sizes'] = {'64', '32'}
@@ -183,6 +184,9 @@
   if not _user_input_variants['prebuild']: # Default
     _user_input_variants['prebuild'].add('prebuild')
 
+  if not _user_input_variants['cdex_level']: # Default
+    _user_input_variants['cdex_level'].add('cdex-none')
+
   # By default only run without jvmti
   if not _user_input_variants['jvmti']:
     _user_input_variants['jvmti'].add('no-jvmti')
@@ -339,10 +343,11 @@
                              _user_input_variants['relocate'], _user_input_variants['trace'],
                              _user_input_variants['gc'], _user_input_variants['jni'],
                              _user_input_variants['image'], _user_input_variants['pictest'],
-                             _user_input_variants['debuggable'], _user_input_variants['jvmti'])
+                             _user_input_variants['debuggable'], _user_input_variants['jvmti'],
+                             _user_input_variants['cdex_level'])
 
   for test, target, run, prebuild, compiler, relocate, trace, gc, \
-      jni, image, pictest, debuggable, jvmti in config:
+      jni, image, pictest, debuggable, jvmti, cdex_level in config:
     for address_size in _user_input_variants['address_sizes_target'][target]:
       if stop_testrunner:
         # When ART_TEST_KEEP_GOING is set to false, then as soon as a test
@@ -356,6 +361,7 @@
       test_name += target + '-run-test-'
       test_name += run + '-'
       test_name += prebuild + '-'
+      test_name += cdex_level + '-'
       test_name += compiler + '-'
       test_name += relocate + '-'
       test_name += trace + '-'
@@ -369,7 +375,7 @@
       test_name += address_size
 
       variant_set = {target, run, prebuild, compiler, relocate, trace, gc, jni,
-                     image, pictest, debuggable, jvmti, address_size}
+                     image, pictest, debuggable, jvmti, cdex_level, address_size}
 
       options_test = options_all
 
@@ -386,6 +392,9 @@
       elif prebuild == 'no-dex2oat':
         options_test += ' --no-prebuild --no-dex2oat'
 
+      # Add option and remove the cdex- prefix.
+      options_test += ' --compact-dex-level ' + cdex_level.replace('cdex-','')
+
       if compiler == 'optimizing':
         options_test += ' --optimizing'
       elif compiler == 'regalloc_gc':
@@ -806,6 +815,7 @@
   regex += '(' + '|'.join(VARIANT_TYPE_DICT['pictest']) + ')-'
   regex += '(' + '|'.join(VARIANT_TYPE_DICT['debuggable']) + ')-'
   regex += '(' + '|'.join(VARIANT_TYPE_DICT['jvmti']) + ')-'
+  regex += '(' + '|'.join(VARIANT_TYPE_DICT['cdex_level']) + ')-'
   regex += '(' + '|'.join(RUN_TEST_SET) + ')'
   regex += '(' + '|'.join(VARIANT_TYPE_DICT['address_sizes']) + ')$'
   match = re.match(regex, test_name)
@@ -822,8 +832,9 @@
     _user_input_variants['pictest'].add(match.group(10))
     _user_input_variants['debuggable'].add(match.group(11))
     _user_input_variants['jvmti'].add(match.group(12))
-    _user_input_variants['address_sizes'].add(match.group(14))
-    return {match.group(13)}
+    _user_input_variants['cdex_level'].add(match.group(13))
+    _user_input_variants['address_sizes'].add(match.group(15))
+    return {match.group(14)}
   raise ValueError(test_name + " is not a valid test")
 
 
@@ -874,7 +885,7 @@
   global run_all_configs
 
   parser = argparse.ArgumentParser(description="Runs all or a subset of the ART test suite.")
-  parser.add_argument('-t', '--test', dest='test', help='name of the test')
+  parser.add_argument('-t', '--test', action='append', dest='tests', help='name(s) of the test(s)')
   parser.add_argument('-j', type=int, dest='n_thread')
   parser.add_argument('--timeout', default=timeout, type=int, dest='timeout')
   for variant in TOTAL_VARIANTS_SET:
@@ -906,10 +917,12 @@
     options = setup_env_for_build_target(target_config[options['build_target']],
                                          parser, options)
 
-  test = ''
+  tests = None
   env.EXTRA_DISABLED_TESTS.update(set(options['skips']))
-  if options['test']:
-    test = parse_test_name(options['test'])
+  if options['tests']:
+    tests = set()
+    for test_name in options['tests']:
+      tests |= parse_test_name(test_name)
 
   for variant_type in VARIANT_TYPE_DICT:
     for variant in VARIANT_TYPE_DICT[variant_type]:
@@ -935,11 +948,11 @@
   if options['run_all']:
     run_all_configs = True
 
-  return test
+  return tests
 
 def main():
   gather_test_info()
-  user_requested_test = parse_option()
+  user_requested_tests = parse_option()
   setup_test_env()
   if build:
     build_targets = ''
@@ -956,8 +969,8 @@
     build_command += ' dist'
     if subprocess.call(build_command.split()):
       sys.exit(1)
-  if user_requested_test:
-    test_runner_thread = threading.Thread(target=run_tests, args=(user_requested_test,))
+  if user_requested_tests:
+    test_runner_thread = threading.Thread(target=run_tests, args=(user_requested_tests,))
   else:
     test_runner_thread = threading.Thread(target=run_tests, args=(RUN_TEST_SET,))
   test_runner_thread.daemon = True
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index d85f33a..9a7352e 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -17,7 +17,8 @@
 #include <jni.h>
 #include <stdio.h>
 
-#include "base/logging.h"
+#include <android-base/logging.h>
+
 #include "base/macros.h"
 
 #include "jni_binder.h"
diff --git a/test/ti-agent/trace_helper.cc b/test/ti-agent/trace_helper.cc
index 8b74c7c..bbc7754 100644
--- a/test/ti-agent/trace_helper.cc
+++ b/test/ti-agent/trace_helper.cc
@@ -39,6 +39,18 @@
   bool in_callback;
   bool access_watch_on_load;
   bool modify_watch_on_load;
+  jrawMonitorID trace_mon;
+
+  jclass GetTestClass(jvmtiEnv* jvmti, JNIEnv* env) {
+    if (JvmtiErrorToException(env, jvmti, jvmti->RawMonitorEnter(trace_mon))) {
+      return nullptr;
+    }
+    jclass out = reinterpret_cast<jclass>(env->NewLocalRef(test_klass));
+    if (JvmtiErrorToException(env, jvmti, jvmti->RawMonitorExit(trace_mon))) {
+      return nullptr;
+    }
+    return out;
+  }
 };
 
 static void threadStartCB(jvmtiEnv* jvmti,
@@ -49,8 +61,12 @@
                             jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   CHECK(data->thread_start != nullptr);
-  jnienv->CallStaticVoidMethod(data->test_klass, data->thread_start, thread);
+  jnienv->CallStaticVoidMethod(klass.get(), data->thread_start, thread);
 }
 static void threadEndCB(jvmtiEnv* jvmti,
                           JNIEnv* jnienv,
@@ -60,8 +76,12 @@
                             jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   CHECK(data->thread_end != nullptr);
-  jnienv->CallStaticVoidMethod(data->test_klass, data->thread_end, thread);
+  jnienv->CallStaticVoidMethod(klass.get(), data->thread_end, thread);
 }
 
 static void singleStepCB(jvmtiEnv* jvmti,
@@ -77,10 +97,14 @@
   if (data->in_callback) {
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   CHECK(data->single_step != nullptr);
   data->in_callback = true;
   jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
-  jnienv->CallStaticVoidMethod(data->test_klass,
+  jnienv->CallStaticVoidMethod(klass.get(),
                                data->single_step,
                                thread,
                                method_arg,
@@ -106,11 +130,15 @@
     // Don't do callback for either of these to prevent an infinite loop.
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   CHECK(data->field_access != nullptr);
   data->in_callback = true;
   jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
   jobject field_arg = GetJavaField(jvmti, jnienv, field_klass, field);
-  jnienv->CallStaticVoidMethod(data->test_klass,
+  jnienv->CallStaticVoidMethod(klass.get(),
                                data->field_access,
                                method_arg,
                                static_cast<jlong>(location),
@@ -141,6 +169,10 @@
     // Don't do callback recursively to prevent an infinite loop.
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   CHECK(data->field_modify != nullptr);
   data->in_callback = true;
   jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
@@ -152,7 +184,7 @@
     jnienv->DeleteLocalRef(field_arg);
     return;
   }
-  jnienv->CallStaticVoidMethod(data->test_klass,
+  jnienv->CallStaticVoidMethod(klass.get(),
                                data->field_modify,
                                method_arg,
                                static_cast<jlong>(location),
@@ -180,6 +212,10 @@
     // Don't do callback for either of these to prevent an infinite loop.
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   CHECK(data->exit_method != nullptr);
   data->in_callback = true;
   jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
@@ -189,7 +225,7 @@
     data->in_callback = false;
     return;
   }
-  jnienv->CallStaticVoidMethod(data->test_klass,
+  jnienv->CallStaticVoidMethod(klass.get(),
                                data->exit_method,
                                method_arg,
                                was_popped_by_exception,
@@ -212,12 +248,16 @@
     // Don't do callback for either of these to prevent an infinite loop.
     return;
   }
+  ScopedLocalRef<jclass> klass(jnienv, data->GetTestClass(jvmti, jnienv));
+  if (klass.get() == nullptr) {
+    return;
+  }
   data->in_callback = true;
   jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
   if (jnienv->ExceptionCheck()) {
     return;
   }
-  jnienv->CallStaticVoidMethod(data->test_klass, data->enter_method, method_arg);
+  jnienv->CallStaticVoidMethod(klass.get(), data->enter_method, method_arg);
   jnienv->DeleteLocalRef(method_arg);
   data->in_callback = false;
 }
@@ -407,6 +447,10 @@
     return;
   }
   memset(data, 0, sizeof(TraceData));
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->CreateRawMonitor("Trace monitor", &data->trace_mon))) {
+    return;
+  }
   data->test_klass = reinterpret_cast<jclass>(env->NewGlobalRef(klass));
   data->enter_method = enter != nullptr ? env->FromReflectedMethod(enter) : nullptr;
   data->exit_method = exit != nullptr ? env->FromReflectedMethod(exit) : nullptr;
@@ -537,42 +581,63 @@
   if (data == nullptr || data->test_klass == nullptr) {
     return;
   }
-  env->DeleteGlobalRef(data->test_klass);
-  if (env->ExceptionCheck()) {
-    return;
-  }
-  // Clear test_klass so we know this isn't being used
-  data->test_klass = nullptr;
+  ScopedLocalRef<jthrowable> err(env, nullptr);
+  // First disable all the events.
   if (JvmtiErrorToException(env, jvmti_env,
                             jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                                 JVMTI_EVENT_FIELD_ACCESS,
                                                                 thr))) {
-    return;
+    env->ExceptionDescribe();
+    err.reset(env->ExceptionOccurred());
+    env->ExceptionClear();
   }
   if (JvmtiErrorToException(env, jvmti_env,
                             jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                                 JVMTI_EVENT_FIELD_MODIFICATION,
                                                                 thr))) {
-    return;
+    env->ExceptionDescribe();
+    err.reset(env->ExceptionOccurred());
+    env->ExceptionClear();
   }
   if (JvmtiErrorToException(env, jvmti_env,
                             jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                                 JVMTI_EVENT_METHOD_ENTRY,
                                                                 thr))) {
-    return;
+    env->ExceptionDescribe();
+    err.reset(env->ExceptionOccurred());
+    env->ExceptionClear();
   }
   if (JvmtiErrorToException(env, jvmti_env,
                             jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                                 JVMTI_EVENT_METHOD_EXIT,
                                                                 thr))) {
-    return;
+    env->ExceptionDescribe();
+    err.reset(env->ExceptionOccurred());
+    env->ExceptionClear();
   }
   if (JvmtiErrorToException(env, jvmti_env,
                             jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                                 JVMTI_EVENT_SINGLE_STEP,
                                                                 thr))) {
+    env->ExceptionDescribe();
+    err.reset(env->ExceptionOccurred());
+    env->ExceptionClear();
+  }
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->RawMonitorEnter(data->trace_mon))) {
     return;
   }
+  // Clear test_klass so we know this isn't being used
+  env->DeleteGlobalRef(data->test_klass);
+  data->test_klass = nullptr;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->RawMonitorExit(data->trace_mon))) {
+    return;
+  }
+  if (err.get() != nullptr) {
+    env->Throw(err.get());
+  }
 }
 
 }  // namespace common_trace
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index a9a0492..34e6a9c 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -23,6 +23,7 @@
 LOCAL_SRC_FILES := $(call all-java-files-under, src/main)
 LOCAL_JAR_MANIFEST := etc/ahat.mf
 LOCAL_JAVA_RESOURCE_FILES := $(LOCAL_PATH)/etc/style.css
+LOCAL_JAVACFLAGS := -Xdoclint:all/protected
 LOCAL_IS_HOST_MODULE := true
 LOCAL_MODULE_TAGS := optional
 LOCAL_MODULE := ahat
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index a765b17..cdfeba4 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -30,7 +30,6 @@
  * Show somewhere where to send bugs.
  * Include a link to /objects in the overview and menu?
  * Turn on LOCAL_JAVACFLAGS := -Xlint:unchecked -Werror
- * Use hex for object ids in URLs?
 
  * [low priority] by site allocations won't line up if the stack has been
    truncated. Is there any way to manually line them up in that case?
@@ -54,7 +53,15 @@
  * Request to be able to sort tables by size.
 
 Release History:
- 1.5 Pending
+ 1.6 Pending
+
+ 1.5 December 05, 2017
+   Distinguish between weakly reachable and unreachable instances.
+   Allow hex ids to be used for objects in query parameters.
+   Restore old presentation of sample paths from gc roots.
+   Fix bug in selection of sample paths from gc root.
+   Fix bug in proguard deobfuscation of stack frames.
+   Tighten up and document ahat public API.
 
  1.4 October 03, 2017
    Give better error messages on failure to launch ahat.
diff --git a/tools/ahat/etc/ahat.mf b/tools/ahat/etc/ahat.mf
index 1753406..df96483 100644
--- a/tools/ahat/etc/ahat.mf
+++ b/tools/ahat/etc/ahat.mf
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 1.4
+Implementation-Version: 1.5
 Main-Class: com.android.ahat.Main
diff --git a/tools/ahat/etc/ahat_api.txt b/tools/ahat/etc/ahat_api.txt
index 7920ada..93fe46b 100644
--- a/tools/ahat/etc/ahat_api.txt
+++ b/tools/ahat/etc/ahat_api.txt
@@ -9,7 +9,6 @@
 package com.android.ahat.dominators {
 
   public class DominatorsComputation {
-    ctor public DominatorsComputation();
     method public static void computeDominators(com.android.ahat.dominators.DominatorsComputation.Node);
   }
 
@@ -109,7 +108,6 @@
   }
 
   public class Diff {
-    ctor public Diff();
     method public static void snapshots(com.android.ahat.heapdump.AhatSnapshot, com.android.ahat.heapdump.AhatSnapshot);
   }
 
@@ -159,7 +157,6 @@
   }
 
   public class Parser {
-    ctor public Parser();
     method public static com.android.ahat.heapdump.AhatSnapshot parseHeapDump(java.io.File, com.android.ahat.proguard.ProguardMap) throws com.android.ahat.heapdump.HprofFormatException, java.io.IOException;
     method public static com.android.ahat.heapdump.AhatSnapshot parseHeapDump(java.nio.ByteBuffer, com.android.ahat.proguard.ProguardMap) throws com.android.ahat.heapdump.HprofFormatException, java.io.IOException;
   }
@@ -210,11 +207,9 @@
   }
 
   public static class Site.ObjectsInfo implements com.android.ahat.heapdump.Diffable {
-    ctor public Site.ObjectsInfo(com.android.ahat.heapdump.AhatHeap, com.android.ahat.heapdump.AhatClassObj);
     method public com.android.ahat.heapdump.Site.ObjectsInfo getBaseline();
     method public java.lang.String getClassName();
     method public boolean isPlaceHolder();
-    method public void setBaseline(com.android.ahat.heapdump.Site.ObjectsInfo);
     field public com.android.ahat.heapdump.AhatClassObj classObj;
     field public com.android.ahat.heapdump.AhatHeap heap;
     field public com.android.ahat.heapdump.Size numBytes;
@@ -236,6 +231,7 @@
     ctor public Sort();
     method public static java.util.Comparator<com.android.ahat.heapdump.AhatInstance> defaultInstanceCompare(com.android.ahat.heapdump.AhatSnapshot);
     method public static java.util.Comparator<com.android.ahat.heapdump.Site> defaultSiteCompare(com.android.ahat.heapdump.AhatSnapshot);
+    method public static <T> java.util.Comparator<T> withPriority(java.util.Comparator<T>...);
     field public static final java.util.Comparator<com.android.ahat.heapdump.FieldValue> FIELD_VALUE_BY_NAME;
     field public static final java.util.Comparator<com.android.ahat.heapdump.FieldValue> FIELD_VALUE_BY_TYPE;
     field public static final java.util.Comparator<com.android.ahat.heapdump.AhatInstance> INSTANCE_BY_TOTAL_RETAINED_SIZE;
@@ -246,22 +242,6 @@
     field public static final java.util.Comparator<com.android.ahat.heapdump.Size> SIZE_BY_SIZE;
   }
 
-  public static class Sort.InstanceByHeapRetainedSize implements java.util.Comparator {
-    ctor public Sort.InstanceByHeapRetainedSize(com.android.ahat.heapdump.AhatHeap);
-    method public int compare(com.android.ahat.heapdump.AhatInstance, com.android.ahat.heapdump.AhatInstance);
-  }
-
-  public static class Sort.SiteByHeapSize implements java.util.Comparator {
-    ctor public Sort.SiteByHeapSize(com.android.ahat.heapdump.AhatHeap);
-    method public int compare(com.android.ahat.heapdump.Site, com.android.ahat.heapdump.Site);
-  }
-
-  public static class Sort.WithPriority<T> implements java.util.Comparator {
-    ctor public Sort.WithPriority(java.util.Comparator<T>...);
-    ctor public Sort.WithPriority(java.util.List<java.util.Comparator<T>>);
-    method public int compare(T, T);
-  }
-
   public final class Type extends java.lang.Enum {
     method public static com.android.ahat.heapdump.Type valueOf(java.lang.String);
     method public static final com.android.ahat.heapdump.Type[] values();
@@ -285,7 +265,6 @@
     method public java.lang.Integer asInteger();
     method public java.lang.Long asLong();
     method public abstract boolean equals(java.lang.Object);
-    method public com.android.ahat.heapdump.Value getBaseline();
     method public static com.android.ahat.heapdump.Value getBaseline(com.android.ahat.heapdump.Value);
     method public static com.android.ahat.heapdump.Type getType(com.android.ahat.heapdump.Value);
     method public boolean isAhatInstance();
diff --git a/tools/ahat/src/main/com/android/ahat/Main.java b/tools/ahat/src/main/com/android/ahat/Main.java
index 048573e..04a6012 100644
--- a/tools/ahat/src/main/com/android/ahat/Main.java
+++ b/tools/ahat/src/main/com/android/ahat/Main.java
@@ -30,6 +30,9 @@
 import java.text.ParseException;
 import java.util.concurrent.Executors;
 
+/**
+ * Contains the main entry point for the ahat heap dump viewer.
+ */
 public class Main {
   private Main() {
   }
@@ -70,6 +73,14 @@
     throw new AssertionError("Unreachable");
   }
 
+  /**
+   * Main entry for ahat heap dump viewer.
+   * Launches an http server on localhost for viewing a given heap dump.
+   * See the ahat README or pass "--help" as one of the arguments to see a
+   * description of what arguments and options are expected.
+   *
+   * @param args the command line arguments
+   */
   public static void main(String[] args) {
     int port = 7100;
     for (String arg : args) {
diff --git a/tools/ahat/src/main/com/android/ahat/SiteHandler.java b/tools/ahat/src/main/com/android/ahat/SiteHandler.java
index 543eaa3..5093f0d 100644
--- a/tools/ahat/src/main/com/android/ahat/SiteHandler.java
+++ b/tools/ahat/src/main/com/android/ahat/SiteHandler.java
@@ -88,7 +88,7 @@
         new Column("Class"));
 
     List<Site.ObjectsInfo> infos = site.getObjectsInfos();
-    Comparator<Site.ObjectsInfo> compare = new Sort.WithPriority<Site.ObjectsInfo>(
+    Comparator<Site.ObjectsInfo> compare = Sort.withPriority(
         Sort.OBJECTS_INFO_BY_HEAP_NAME,
         Sort.OBJECTS_INFO_BY_SIZE,
         Sort.OBJECTS_INFO_BY_CLASS_NAME);
diff --git a/tools/ahat/src/main/com/android/ahat/dominators/DominatorsComputation.java b/tools/ahat/src/main/com/android/ahat/dominators/DominatorsComputation.java
index 58b7b59..d3fea48 100644
--- a/tools/ahat/src/main/com/android/ahat/dominators/DominatorsComputation.java
+++ b/tools/ahat/src/main/com/android/ahat/dominators/DominatorsComputation.java
@@ -23,38 +23,72 @@
 import java.util.Queue;
 
 /**
- * Generic DominatorsComputation.
- *
- * To use the dominators computation, have your graph nodes implement the
- * DominatorsComputation.Node interface, then call
- * DominatorsComputation.computeDominators on the single root node.
+ * Provides a static method for computing the immediate dominators of a
+ * directed graph. It can be used with any directed graph data structure
+ * that implements the {@link DominatorsComputation.Node} interface and has
+ * some root node with no incoming edges.
  */
 public class DominatorsComputation {
+  private DominatorsComputation() {
+  }
+
   /**
-   * Interface for a directed graph to perform the dominators computation on.
+   * Interface for a directed graph to perform immediate dominators
+   * computation on.
+   * The dominators computation can be used with directed graph data
+   * structures that implement this <code>Node</code> interface. To use the
+   * dominators computation on your graph, you must make the following
+   * functionality available to the dominators computation:
+   * <ul>
+   * <li>Efficiently mapping from node to associated internal dominators
+   *     computation state using the
+   *     {@link #setDominatorsComputationState setDominatorsComputationState} and
+   *     {@link #getDominatorsComputationState getDominatorsComputationState} methods.
+   * <li>Iterating over all outgoing edges of an node using the
+   *     {@link #getReferencesForDominators getReferencesForDominators} method.
+   * <li>Setting the computed dominator for a node using the
+   *     {@link #setDominator setDominator} method.
+   * </ul>
    */
   public interface Node {
     /**
-     * Associate the given dominator state with this node.
+     * Associates the given dominator state with this node. Subsequent calls to
+     * {@link #getDominatorsComputationState getDominatorsComputationState} on
+     * this node should return the state given here. At the conclusion of the
+     * dominators computation, this method will be called for
+     * each node with <code>state</code> set to null.
+     *
+     * @param state the dominator state to associate with this node
      */
     void setDominatorsComputationState(Object state);
 
     /**
-     * Get the most recent dominator state associated with this node using
-     * setDominatorsComputationState. If setDominatorsComputationState has not
-     * yet been called, this should return null.
+     * Returns the dominator state most recently associated with this node
+     * by a call to {@link #setDominatorsComputationState setDominatorsComputationState}.
+     * If <code>setDominatorsComputationState</code> has not yet been called
+     * on this node for this dominators computation, this method should return
+     * null.
+     *
+     * @return the associated dominator state
      */
     Object getDominatorsComputationState();
 
     /**
-     * Return a collection of nodes referenced from this node, for the
-     * purposes of computing dominators.
+     * Returns a collection of nodes referenced from this node, for the
+     * purposes of computing dominators. This method will be called at most
+     * once for each node reachable from the root node of the dominators
+     * computation.
+     *
+     * @return an iterable collection of the nodes with an incoming edge from
+     *         this node.
      */
     Iterable<? extends Node> getReferencesForDominators();
 
     /**
-     * Update this node's dominator based on the results of the dominators
+     * Sets the dominator for this node based on the results of the dominators
      * computation.
+     *
+     * @param dominator the computed immediate dominator of this node
      */
     void setDominator(Node dominator);
   }
@@ -112,8 +146,14 @@
   }
 
   /**
-   * Compute the dominator tree rooted at the given node.
-   * There must not be any incoming references to the root node.
+   * Computes the immediate dominators of all nodes reachable from the <code>root</code> node.
+   * There must not be any incoming references to the <code>root</code> node.
+   * <p>
+   * The result of this function is to call the {@link Node#setDominator}
+   * function on every node reachable from the root node.
+   *
+   * @param root the root node of the dominators computation
+   * @see Node
    */
   public static void computeDominators(Node root) {
     long id = 0;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatArrayInstance.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatArrayInstance.java
index ccdd6e4..9c80802 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatArrayInstance.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatArrayInstance.java
@@ -21,6 +21,12 @@
 import java.util.Collections;
 import java.util.List;
 
+/**
+ * An array instance from a parsed heap dump.
+ * It is used for both object and primitive arrays. The class provides methods
+ * for accessing the length and elements of the array in addition to those
+ * methods inherited from {@link AhatInstance}.
+ */
 public class AhatArrayInstance extends AhatInstance {
   // To save space, we store arrays as primitive arrays or AhatInstance arrays
   // and provide a wrapper over the arrays to expose a list of Values.
@@ -186,21 +192,30 @@
   }
 
   /**
-   * Returns the length of the array.
+   * Returns the number of elements in the array.
+   *
+   * @return number of elements in the array.
    */
   public int getLength() {
     return mValues.size();
   }
 
   /**
-   * Returns the array's values.
+   * Returns a list of all of the array's elements in order.
+   * The returned list does not support modification.
+   *
+   * @return list of the array's elements.
    */
   public List<Value> getValues() {
     return mValues;
   }
 
   /**
-   * Returns the object at the given index of this array.
+   * Returns the value at the given index of this array.
+   *
+   * @param index the index of the value to retrieve
+   * @return the value at the given index
+   * @throws IndexOutOfBoundsException if the index is out of range
    */
   public Value getValue(int index) {
     return mValues.get(index);
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassInstance.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassInstance.java
index cb9d959..c82ef20 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassInstance.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassInstance.java
@@ -20,6 +20,15 @@
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
+/**
+ * A typical Java object from a parsed heap dump.
+ * Note that this is used for Java objects that are instances of classes (as
+ * opposed to arrays), not for class objects themselves.
+ * See {@link AhatClassObj } for the representation of class objects.
+ * <p>
+ * This class provides a method for iterating over the instance fields of the
+ * object in addition to those methods inherited from {@link AhatInstance}.
+ */
 public class AhatClassInstance extends AhatInstance {
   // Instance fields of the object. These are stored in order of the instance
   // field descriptors from the class object, starting with this class first,
@@ -84,6 +93,10 @@
 
   /**
    * Returns the list of class instance fields for this instance.
+   * Includes values of field inherited from the superclass of this instance.
+   * The fields are returned in no particular order.
+   *
+   * @return Iterable over the instance field values.
    */
   public Iterable<FieldValue> getInstanceFields() {
     return new InstanceFieldIterator(mFields, getClassObj());
@@ -220,7 +233,7 @@
 
   }
 
-  public BufferedImage asBitmap() {
+  @Override public BufferedImage asBitmap() {
     BitmapInfo info = getBitmapInfo();
     if (info == null) {
       return null;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassObj.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassObj.java
index 3babf76..36ada28 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassObj.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatClassObj.java
@@ -20,6 +20,13 @@
 import java.util.Arrays;
 import java.util.List;
 
+/**
+ * A class from a parsed heap dump.
+ * In addition to those methods inherited from {@link AhatInstance}, the class
+ * provides methods for accessing information about the class object, such as
+ * the class loader, superclass, static field values and instance field
+ * descriptors.
+ */
 public class AhatClassObj extends AhatInstance {
   private String mClassName;
   private AhatClassObj mSuperClassObj;
@@ -56,6 +63,9 @@
 
   /**
    * Returns the name of the class this is a class object for.
+   * For example, "java.lang.String".
+   *
+   * @return the name of the class
    */
   public String getName() {
     return mClassName;
@@ -63,6 +73,8 @@
 
   /**
    * Returns the superclass of this class object.
+   *
+   * @return the superclass object
    */
   public AhatClassObj getSuperClassObj() {
     return mSuperClassObj;
@@ -70,14 +82,18 @@
 
   /**
    * Returns the class loader of this class object.
+   *
+   * @return the class loader object
    */
   public AhatInstance getClassLoader() {
     return mClassLoader;
   }
 
   /**
-   * Returns the size of instances of this object, as reported in the heap
-   * dump.
+   * Returns the size of instances of this object.
+   * The size returned is as reported in the heap dump.
+   *
+   * @return the class instance size
    */
   public long getInstanceSize() {
     return mInstanceSize;
@@ -85,6 +101,8 @@
 
   /**
    * Returns the static field values for this class object.
+   *
+   * @return the static field values
    */
   public List<FieldValue> getStaticFieldValues() {
     return Arrays.asList(mStaticFieldValues);
@@ -92,6 +110,9 @@
 
   /**
    * Returns the fields of instances of this class.
+   * Does not include fields from the super class of this class.
+   *
+   * @return the instance fields
    */
   public Field[] getInstanceFields() {
     return mInstanceFields;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatHeap.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatHeap.java
index b8897a1..60c9a0d 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatHeap.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatHeap.java
@@ -16,6 +16,13 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * Used to identify and access basic information about a particular
+ * heap from the heap dump. Standard Java heap dumps have a single heap,
+ * called the "default" heap. Android heap dumps distinguish among "zygote",
+ * "image", and "app" heaps. There will be a single instance of AhatHeap for
+ * each different heap in the heap dump.
+ */
 public class AhatHeap implements Diffable<AhatHeap> {
   private String mName;
   private Size mSize = Size.ZERO;
@@ -61,6 +68,9 @@
 
   /**
    * Returns the name of this heap.
+   * For example, "default", "app", "image", or "zygote".
+   *
+   * @return The name of the heap.
    */
   public String getName() {
     return mName;
@@ -68,6 +78,8 @@
 
   /**
    * Returns the total number of bytes allocated on this heap.
+   *
+   * @return the total number of bytes allocated on this heap.
    */
   public Size getSize() {
     return mSize;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java
index a9f819f..67253bf 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatInstance.java
@@ -26,6 +26,11 @@
 import java.util.List;
 import java.util.Queue;
 
+/**
+ * A Java instance from a parsed heap dump. It is the base class used for all
+ * kinds of Java instances, including normal Java objects, class objects, and
+ * arrays.
+ */
 public abstract class AhatInstance implements Diffable<AhatInstance>,
                                               DominatorsComputation.Node {
   // The id of this instance from the heap dump.
@@ -80,14 +85,20 @@
   }
 
   /**
-   * Returns a unique identifier for the instance.
+   * Returns a unique identifier for this instance.
+   *
+   * @return id of the instance
    */
   public long getId() {
     return mId;
   }
 
   /**
-   * Returns the shallow number of bytes this object takes up.
+   * Returns the number of bytes used for this object in the heap.
+   * The returned size is a shallow size for the object that does not include
+   * sizes of other objects dominated by this object.
+   *
+   * @return the shallow size of the object
    */
   public Size getSize() {
     return new Size(mClassObj.getInstanceSize() + getExtraJavaSize(), mRegisteredNativeSize);
@@ -104,8 +115,13 @@
   abstract long getExtraJavaSize();
 
   /**
-   * Returns the number of bytes belonging to the given heap that this instance
-   * retains.
+   * Returns the number of bytes retained by this object in the given heap.
+   * The returned size includes the shallow size of this object and the size
+   * of all objects directly or indirectly retained by this object. Only those
+   * objects allocated on the given heap are included in the reported size.
+   *
+   * @param heap the heap to get the retained size for
+   * @return the retained size of the object
    */
   public Size getRetainedSize(AhatHeap heap) {
     int index = heap.getIndex();
@@ -116,7 +132,11 @@
   }
 
   /**
-   * Returns the total number of bytes this instance retains.
+   * Returns the total number of bytes retained by this object. The returned
+   * size includes the shallow size of this object and the size of all objects
+   * directly or indirectly retained by this object.
+   *
+   * @return the total retained size of the object
    */
   public Size getTotalRetainedSize() {
     Size size = Size.ZERO;
@@ -136,7 +156,11 @@
   }
 
   /**
-   * Returns true if this object is strongly-reachable.
+   * Returns true if this object is strongly reachable. An object is strongly
+   * reachable if there exists a path of (strong) references from some root
+   * object to this object.
+   *
+   * @return true if the object is strongly reachable
    */
   public boolean isStronglyReachable() {
     return mImmediateDominator != null;
@@ -144,14 +168,28 @@
 
   /**
    * Returns true if this object is reachable only through a
-   * soft/weak/phantom/finalizer reference.
+   * soft/weak/phantom/finalizer reference. An object is weakly reachable if
+   * it is not strongly reachable but there still exists a path of references
+   * from some root object to this object.  Because the object is not strongly
+   * reachable, any such path must contain a SoftReference, WeakReference,
+   * PhantomReference, or FinalizerReference somewhere along it.
+   * <p>
+   * Unlike a strongly reachable object, a weakly reachable object is allowed
+   * to be garbage collected.
+   *
+   * @return true if the object is weakly reachable
    */
   public boolean isWeaklyReachable() {
     return !isStronglyReachable() && mNextInstanceToGcRoot != null;
   }
 
   /**
-   * Returns true if this object is completely unreachable.
+   * Returns true if this object is completely unreachable. An object is
+   * completely unreachable if there is no path to the object from some root
+   * object, neither through strong nor soft/weak/phantom/finalizer
+   * references.
+   *
+   * @return true if the object is completely unreachable
    */
   public boolean isUnreachable() {
     return !isStronglyReachable() && !isWeaklyReachable();
@@ -159,6 +197,8 @@
 
   /**
    * Returns the heap that this instance is allocated on.
+   *
+   * @return heap the instance is allocated on
    */
   public AhatHeap getHeap() {
     return mHeap;
@@ -171,7 +211,10 @@
   abstract Iterable<Reference> getReferences();
 
   /**
-   * Returns true if this instance is marked as a root instance.
+   * Returns true if this instance is a GC root.
+   *
+   * @return true if this instance is a GC root.
+   * @see getRootTypes
    */
   public boolean isRoot() {
     return mRootTypes != 0;
@@ -187,6 +230,8 @@
   /**
    * Returns a list of the root types of this object.
    * Returns null if this object is not a root.
+   *
+   * @return list of the objects root types
    */
   public Collection<RootType> getRootTypes() {
     if (!isRoot()) {
@@ -205,14 +250,17 @@
   /**
    * Returns the immediate dominator of this instance.
    * Returns null if this is a root instance.
+   *
+   * @return the immediate dominator of this instance
    */
   public AhatInstance getImmediateDominator() {
     return mImmediateDominator;
   }
 
   /**
-   * Returns a list of those objects immediately dominated by the given
-   * instance.
+   * Returns a list of objects immediately dominated by this instance.
+   *
+   * @return list of immediately dominated objects
    */
   public List<AhatInstance> getDominated() {
     return mDominated;
@@ -220,13 +268,17 @@
 
   /**
    * Returns the site where this instance was allocated.
+   *
+   * @return the object's allocation site
    */
   public Site getSite() {
     return mSite;
   }
 
   /**
-   * Returns true if the given instance is a class object
+   * Returns true if this instance is a class object
+   *
+   * @return true if this instance is a class object
    */
   public boolean isClassObj() {
     // Overridden by AhatClassObj.
@@ -236,6 +288,8 @@
   /**
    * Returns this as an AhatClassObj if this is an AhatClassObj.
    * Returns null if this is not an AhatClassObj.
+   *
+   * @return this instance as a class object
    */
   public AhatClassObj asClassObj() {
     // Overridden by AhatClassObj.
@@ -243,7 +297,11 @@
   }
 
   /**
-   * Returns the class object instance for the class of this object.
+   * Returns the class object for this instance.
+   * For example, if this object is an instance of java.lang.String, this
+   * method returns the AhatClassObj for java.lang.String.
+   *
+   * @return the instance's class object
    */
   public AhatClassObj getClassObj() {
     return mClassObj;
@@ -251,6 +309,10 @@
 
   /**
    * Returns the name of the class this object belongs to.
+   * For example, if this object is an instance of java.lang.String, returns
+   * "java.lang.String".
+   *
+   * @return the name of this instance's class
    */
   public String getClassName() {
     AhatClassObj classObj = getClassObj();
@@ -258,7 +320,9 @@
   }
 
   /**
-   * Returns true if the given instance is an array instance
+   * Returns true if the given instance is an array instance.
+   *
+   * @return true if the given instance is an array instance
    */
   public boolean isArrayInstance() {
     // Overridden by AhatArrayInstance.
@@ -268,6 +332,8 @@
   /**
    * Returns this as an AhatArrayInstance if this is an AhatArrayInstance.
    * Returns null if this is not an AhatArrayInstance.
+   *
+   * @return this instance as an array instance
    */
   public AhatArrayInstance asArrayInstance() {
     // Overridden by AhatArrayInstance.
@@ -275,7 +341,9 @@
   }
 
   /**
-   * Returns true if the given instance is a class instance
+   * Returns true if this instance is a class instance.
+   *
+   * @return true if this instance is a class instance
    */
   public boolean isClassInstance() {
     return false;
@@ -284,15 +352,20 @@
   /**
    * Returns this as an AhatClassInstance if this is an AhatClassInstance.
    * Returns null if this is not an AhatClassInstance.
+   *
+   * @return this instance as a class instance
    */
   public AhatClassInstance asClassInstance() {
     return null;
   }
 
   /**
-   * Return the referent associated with this instance.
-   * This is relevent for instances of java.lang.ref.Reference.
-   * Returns null if the instance has no referent associated with it.
+   * Returns the <code>referent</code> associated with this instance.
+   * This is only relevant for instances of java.lang.ref.Reference or its
+   * subclasses. Returns null if the instance has no referent associated with
+   * it.
+   *
+   * @return the referent associated with this instance
    */
   public AhatInstance getReferent() {
     // Overridden by AhatClassInstance.
@@ -300,7 +373,9 @@
   }
 
   /**
-   * Returns a list of objects with hard references to this object.
+   * Returns a list of objects with (strong) references to this object.
+   *
+   * @return the objects referencing this object
    */
   public List<AhatInstance> getHardReverseReferences() {
     if (mHardReverseReferences != null) {
@@ -310,7 +385,10 @@
   }
 
   /**
-   * Returns a list of objects with soft references to this object.
+   * Returns a list of objects with soft/weak/phantom/finalizer references to
+   * this object.
+   *
+   * @return the objects weakly referencing this object
    */
   public List<AhatInstance> getSoftReverseReferences() {
     if (mSoftReverseReferences != null) {
@@ -320,9 +398,12 @@
   }
 
   /**
-   * Returns the value of a field of an instance.
-   * Returns null if the field value is null, the field couldn't be read, or
-   * there are multiple fields with the same name.
+   * Returns the value of a field of this instance. Returns null if the field
+   * value is null, the field couldn't be read, or there are multiple fields
+   * with the same name.
+   *
+   * @param fieldName the name of the field to get the value of
+   * @return the field value
    */
   public Value getField(String fieldName) {
     // Overridden by AhatClassInstance.
@@ -330,8 +411,13 @@
   }
 
   /**
-   * Reads a reference field of this instance.
-   * Returns null if the field value is null, or if the field couldn't be read.
+   * Reads a reference field of this instance. Returns null if the field value
+   * is null, of primitive type, or if the field couldn't be read. There is no
+   * way using this method to distinguish between a reference field with value
+   * <code>null</code> and an invalid field.
+   *
+   * @param fieldName the name of the reference field to get the value of
+   * @return the reference field value
    */
   public AhatInstance getRefField(String fieldName) {
     // Overridden by AhatClassInstance.
@@ -339,30 +425,41 @@
   }
 
   /**
-   * Assuming inst represents a DexCache object, return the dex location for
-   * that dex cache. Returns null if the given instance doesn't represent a
-   * DexCache object or the location could not be found.
+   * Returns the dex location associated with this object. Only applies to
+   * instances of dalvik.system.DexCache. If this is an instance of DexCache,
+   * returns the dex location for that dex cache. Otherwise returns null.
    * If maxChars is non-negative, the returned location is truncated to
    * maxChars in length.
+   *
+   * @param maxChars the maximum length of the returned string
+   * @return the dex location associated with this object
    */
   public String getDexCacheLocation(int maxChars) {
     return null;
   }
 
   /**
-   * Return the bitmap instance associated with this object, or null if there
-   * is none. This works for android.graphics.Bitmap instances and their
-   * underlying Byte[] instances.
+   * Returns the android.graphics.Bitmap instance associated with this object.
+   * Instances of android.graphics.Bitmap return themselves. If this is a
+   * byte[] array containing pixel data for an instance of
+   * android.graphics.Bitmap, that instance of android.graphics.Bitmap is
+   * returned. Otherwise null is returned.
+   *
+   * @return the bitmap instance associated with this object
    */
   public AhatInstance getAssociatedBitmapInstance() {
     return null;
   }
 
   /**
-   * Read the string value from this instance.
-   * Returns null if this object can't be interpreted as a string.
-   * The returned string is truncated to maxChars characters.
-   * If maxChars is negative, the returned string is not truncated.
+   * Returns the (bounded-length) string associated with this instance.
+   * Applies to instances of java.lang.String, char[], and in some cases
+   * byte[]. Returns null if this object cannot be interpreted as a string.
+   * If maxChars is non-negative, the returned string is truncated to maxChars
+   * characters in length.
+   *
+   * @param maxChars the maximum length of the returned string
+   * @return the string associated with this instance
    */
   public String asString(int maxChars) {
     // By default instances can't be interpreted as a string. This method is
@@ -372,17 +469,23 @@
   }
 
   /**
-   * Reads the string value from an hprof Instance.
-   * Returns null if the object can't be interpreted as a string.
+   * Returns the string associated with this instance. Applies to instances of
+   * java.lang.String, char[], and in some cases byte[]. Returns null if this
+   * object cannot be interpreted as a string.
+   *
+   * @return the string associated with this instance
    */
   public String asString() {
     return asString(-1);
   }
 
   /**
-   * Return the bitmap associated with the given instance, if any.
+   * Returns the bitmap pixel data associated with this instance.
    * This is relevant for instances of android.graphics.Bitmap and byte[].
-   * Returns null if there is no bitmap associated with the given instance.
+   * Returns null if there is no bitmap pixel data associated with the given
+   * instance.
+   *
+   * @return the bitmap pixel data associated with this image
    */
   public BufferedImage asBitmap() {
     return null;
@@ -402,11 +505,23 @@
   }
 
   /**
-   * Returns a sample path from a GC root to this instance.
-   * This instance is included as the last element of the path with an empty
-   * field description.
+   * Returns a sample path from a GC root to this instance. The first element
+   * of the returned path is a GC root object. This instance is included as
+   * the last element of the path with an empty field description.
+   * <p>
+   * If the instance is strongly reachable, a path of string references will
+   * be returned. If the instance is weakly reachable, the returned path will
+   * include a soft/weak/phantom/finalizer reference somewhere along it.
+   * Returns null if this instance is not reachable.
+   *
+   * @return sample path from a GC root to this instance
+   * @see PathElement
    */
   public List<PathElement> getPathFromGcRoot() {
+    if (isUnreachable()) {
+      return null;
+    }
+
     List<PathElement> path = new ArrayList<PathElement>();
 
     AhatInstance dom = this;
@@ -434,12 +549,15 @@
     return new PathElement(inst.mNextInstanceToGcRoot, inst.mNextInstanceToGcRootField);
   }
 
-  /** Returns a human-readable identifier for this object.
+  /**
+   * Returns a human-readable identifier for this object.
    * For class objects, the string is the class name.
    * For class instances, the string is the class name followed by '@' and the
    * hex id of the instance.
    * For array instances, the string is the array type followed by the size in
    * square brackets, followed by '@' and the hex id of the instance.
+   *
+   * @return human-readable identifier for this object
    */
   @Override public abstract String toString();
 
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java b/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java
index 59ce5d1..535db08 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/AhatSnapshot.java
@@ -19,6 +19,11 @@
 import com.android.ahat.dominators.DominatorsComputation;
 import java.util.List;
 
+/**
+ * A parsed heap dump.
+ * It contains methods to access the heaps, allocation sites, roots, classes,
+ * and instances from the parsed heap dump.
+ */
 public class AhatSnapshot implements Diffable<AhatSnapshot> {
   private final Site mRootSite;
 
@@ -60,16 +65,24 @@
   }
 
   /**
-   * Returns the instance with given id in this snapshot.
+   * Returns the instance with the given id in this snapshot.
+   * Where the id of an instance x is x.getId().
    * Returns null if no instance with the given id is found.
+   *
+   * @param id the id of the instance to find
+   * @return the instance with the given id
    */
   public AhatInstance findInstance(long id) {
     return mInstances.get(id);
   }
 
   /**
-   * Returns the AhatClassObj with given id in this snapshot.
+   * Returns the AhatClassObj with the given id in this snapshot.
+   * Where the id of a class object x is x.getId().
    * Returns null if no class object with the given id is found.
+   *
+   * @param id the id of the class object to find
+   * @return the class object with the given id
    */
   public AhatClassObj findClassObj(long id) {
     AhatInstance inst = findInstance(id);
@@ -77,8 +90,12 @@
   }
 
   /**
-   * Returns the heap with the given name, if any.
+   * Returns the heap with the given name.
+   * Where the name of a heap x is x.getName().
    * Returns null if no heap with the given name could be found.
+   *
+   * @param name the name of the heap to get
+   * @return the heap with the given name
    */
   public AhatHeap getHeap(String name) {
     // We expect a small number of heaps (maybe 3 or 4 total), so a linear
@@ -93,30 +110,45 @@
 
   /**
    * Returns a list of heaps in the snapshot in canonical order.
-   * Modifications to the returned list are visible to this AhatSnapshot,
-   * which is used by diff to insert place holder heaps.
+   * <p>
+   * Note: modifications to the returned list are visible to this
+   * AhatSnapshot, which is used by diff to insert place holder heaps.
+   *
+   * @return list of heaps
    */
   public List<AhatHeap> getHeaps() {
     return mHeaps;
   }
 
   /**
-   * Returns a collection of instances whose immediate dominator is the
-   * SENTINEL_ROOT.
+   * Returns a collection of "rooted" instances.
+   * An instance is "rooted" if it is a GC root, or if it is retained by more
+   * than one GC root. These are reachable instances that are not immediately
+   * dominated by any other instance in the heap.
+   *
+   * @return collection of rooted instances
    */
   public List<AhatInstance> getRooted() {
     return mSuperRoot.getDominated();
   }
 
   /**
-   * Returns the root site for this snapshot.
+   * Returns the root allocation site for this snapshot.
+   *
+   * @return the root allocation site
    */
   public Site getRootSite() {
     return mRootSite;
   }
 
-  // Get the site associated with the given id.
-  // Returns the root site if no such site found.
+  /**
+   * Returns the site associated with the given id.
+   * Where the id of a site x is x.getId().
+   * Returns the root site if no site with the given id is found.
+   *
+   * @param id the id of the site to get
+   * @return the site with the given id
+   */
   public Site getSite(long id) {
     Site site = mRootSite.findSite(id);
     return site == null ? mRootSite : site;
@@ -127,8 +159,10 @@
   }
 
   /**
-   * Returns true if this snapshot has been diffed against another, different
+   * Returns true if this snapshot has been diffed against a different
    * snapshot.
+   *
+   * @return true if the snapshot has been diffed
    */
   public boolean isDiffed() {
     return mBaseline != this;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Diff.java b/tools/ahat/src/main/com/android/ahat/heapdump/Diff.java
index 98c7e58..b35b424 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Diff.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Diff.java
@@ -25,9 +25,15 @@
 import java.util.Map;
 import java.util.Objects;
 
+/**
+ * Provides a static method to diff two heap dumps.
+ */
 public class Diff {
+  private Diff() {
+  }
+
   /**
-   * Perform a diff between two heap lists.
+   * Performs a diff between two heap lists.
    *
    * Heaps are diffed based on heap name. PlaceHolder heaps will be added to
    * the given lists as necessary so that every heap in A has a corresponding
@@ -312,8 +318,16 @@
   }
 
   /**
-   * Perform a diff of the two snapshots, setting each as the baseline for the
-   * other.
+   * Performs a diff of two snapshots.
+   * Each snapshot will be set as the baseline for the other snapshot.
+   * <p>
+   * The diff algorithm attempts to match instances in snapshot <code>a</code>
+   * to corresponding instances in snapshot <code>b</code>. The snapshots need
+   * not come from the same running process, application version, or platform
+   * version.
+   *
+   * @param a one of the snapshots to diff
+   * @param b the other of the snapshots to diff
    */
   public static void snapshots(AhatSnapshot a, AhatSnapshot b) {
     a.setBaseline(b);
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/DiffFields.java b/tools/ahat/src/main/com/android/ahat/heapdump/DiffFields.java
index e3c671f..ff07af0 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/DiffFields.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/DiffFields.java
@@ -22,12 +22,16 @@
 import java.util.List;
 
 /**
- * This class contains a routine for diffing two collections of static or
- * instance fields.
+ * Provides a routine for diffing two collections of static or instance
+ * fields.
  */
 public class DiffFields {
   /**
-   * Return the result of diffing two collections of field values.
+   * Returns the result of diffing two collections of field values.
+   *
+   * @param current a list of fields in the current heap dump
+   * @param baseline a list of fields in the baseline heap dump
+   * @return list of diffed fields
    */
   public static List<DiffedFieldValue> diff(Iterable<FieldValue> current,
                                             Iterable<FieldValue> baseline) {
@@ -85,5 +89,5 @@
    * by field name and type.
    */
   private static final Comparator<FieldValue> FOR_DIFF
-    = new Sort.WithPriority(Sort.FIELD_VALUE_BY_NAME, Sort.FIELD_VALUE_BY_TYPE);
+    = Sort.withPriority(Sort.FIELD_VALUE_BY_NAME, Sort.FIELD_VALUE_BY_TYPE);
 }
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Diffable.java b/tools/ahat/src/main/com/android/ahat/heapdump/Diffable.java
index 53442c8..09c8ee6 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Diffable.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Diffable.java
@@ -17,12 +17,19 @@
 package com.android.ahat.heapdump;
 
 /**
- * An interface for objects that have corresponding objects in a baseline heap
- * dump.
+ * An interface for instances/sites/heaps/etc in a heap dump that can be
+ * related to corresponding instances/sites/heaps/etc in a second heap dump
+ * when the two heap dumps have been diffed.
  */
 public interface Diffable<T> {
   /**
-   * Return the baseline object that corresponds to this one.
+   * Returns the object in the other heap dump that corresponds to this object.
+   * When two heap dumps are diffed, diffable objects from the first heap dump
+   * will be matched to "baseline" objects from the second heap dump, and
+   * diffable objects from the second heap dump will be matched to "baseline"
+   * objects from the first heap dump.
+   *
+   * @return the matched object from the other heap dump
    */
   T getBaseline();
 
@@ -32,6 +39,8 @@
    * baseline heap dump that is not in this heap dump. In that case, we create
    * a dummy place holder object in this heap dump as an indicator of the
    * object removed from the baseline heap dump.
+   *
+   * @return true if the object is a placeholder
    */
   boolean isPlaceHolder();
 }
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/DiffedFieldValue.java b/tools/ahat/src/main/com/android/ahat/heapdump/DiffedFieldValue.java
index 3cd273e..8de337e 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/DiffedFieldValue.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/DiffedFieldValue.java
@@ -18,25 +18,65 @@
 
 import java.util.Objects;
 
-/** DiffedFieldValue is used by the DiffedField class to return the result of
- * diffing two collections of fields.
+/**
+ * Used by the DiffedField class to return the result of diffing two
+ * collections of fields.
  */
 public class DiffedFieldValue {
+  /**
+   * The name of the field.
+   */
   public final String name;
+
+  /**
+   * The type of the field.
+   */
   public final Type type;
+
+  /**
+   * The value of the field in the current heap dump.
+   */
   public final Value current;
+
+  /**
+   * The value of the field in the baseline heap dump.
+   */
   public final Value baseline;
 
+  /**
+   * Whether the field was added to, deleted from, or matched with a field in
+   * the baseline heap dump.
+   */
   public final Status status;
 
+  /**
+   * A status enum to indicate whether a field was added to, deleted from, or
+   * matched with a field in the baseline heap dump.
+   */
   public static enum Status {
-    ADDED,      // The current field has no matching baseline value.
-    MATCHED,    // The current field has a matching baseline value.
-    DELETED     // The baseline field has no matching current value.
+    /**
+     * The field exists in the current heap dump but not the baseline.
+     */
+    ADDED,
+
+    /**
+     * The field exists in both the current and baseline heap dumps.
+     */
+    MATCHED,
+
+    /**
+     * The field exists in the baseline heap dump but not the current.
+     */
+    DELETED
   };
 
   /**
-   * Return a DiffedFieldValue where there is both a current and baseline.
+   * Constructs a DiffedFieldValue where there are both current and baseline
+   * fields.
+   *
+   * @param current the current field
+   * @param baseline the baseline field
+   * @return the constructed DiffedFieldValue
    */
   public static DiffedFieldValue matched(FieldValue current, FieldValue baseline) {
     return new DiffedFieldValue(current.name,
@@ -47,14 +87,20 @@
   }
 
   /**
-   * Return a DiffedFieldValue where there is no baseline.
+   * Constructs a DiffedFieldValue where there is no baseline field.
+   *
+   * @param current the current field
+   * @return the constructed DiffedFieldValue
    */
   public static DiffedFieldValue added(FieldValue current) {
     return new DiffedFieldValue(current.name, current.type, current.value, null, Status.ADDED);
   }
 
   /**
-   * Return a DiffedFieldValue where there is no current.
+   * Constructs a DiffedFieldValue where there is no current field.
+   *
+   * @param baseline the baseline field
+   * @return the constructed DiffedFieldValue
    */
   public static DiffedFieldValue deleted(FieldValue baseline) {
     return new DiffedFieldValue(baseline.name, baseline.type, null, baseline.value, Status.DELETED);
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Field.java b/tools/ahat/src/main/com/android/ahat/heapdump/Field.java
index dff4017..6494069 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Field.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Field.java
@@ -16,10 +16,26 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * A description of a field from a heap dump.
+ */
 public class Field {
+  /**
+   * The name of the field.
+   */
   public final String name;
+
+  /**
+   * The type of the field.
+   */
   public final Type type;
 
+  /**
+   * Constructs a Field instance.
+   *
+   * @param name name of the field
+   * @param type type of the field
+   */
   public Field(String name, Type type) {
     this.name = name;
     this.type = type;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/FieldValue.java b/tools/ahat/src/main/com/android/ahat/heapdump/FieldValue.java
index 20e6da7..70314da 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/FieldValue.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/FieldValue.java
@@ -16,11 +16,32 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * A description and value of a field from a heap dump.
+ */
 public class FieldValue {
+  /**
+   * The name of the field.
+   */
   public final String name;
+
+  /**
+   * The type of the field.
+   */
   public final Type type;
+
+  /**
+   * The value of the field.
+   */
   public final Value value;
 
+  /**
+   * Constructs an instance of FieldValue.
+   *
+   * @param name name of the field
+   * @param type type of the field
+   * @param value value of the field
+   */
   public FieldValue(String name, Type type, Value value) {
     this.name = name;
     this.type = type;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/HprofFormatException.java b/tools/ahat/src/main/com/android/ahat/heapdump/HprofFormatException.java
index 256a3b4..29ac9b0 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/HprofFormatException.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/HprofFormatException.java
@@ -16,6 +16,10 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * Exception thrown when the heap dump parser detects an improperly formatted
+ * heap dump file.
+ */
 public class HprofFormatException extends Exception {
   HprofFormatException(String msg) {
     super(msg);
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java b/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java
index d7b1dd7..13be57d 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Parser.java
@@ -31,21 +31,43 @@
 import java.util.List;
 import java.util.Map;
 
+/**
+ * Provides methods for parsing heap dumps.
+ */
 public class Parser {
   private static final int ID_SIZE = 4;
 
+  private Parser() {
+  }
+
   /**
-   * Parse the given heap dump using the given proguard map for deobfuscation.
-   * We make the following assumptions about valid heap dumps:
-   * Class serial numbers, stack frames, and stack traces
-   * individually satisfy the following:
-   *  - all elements are defined before they are referenced.
-   *  - ids are densely packed in some range [a, b] where a is not
-   *    necessarily 0.
-   *  - there are not more than 2^31 elements defined.
-   * All classes are defined via a LOAD CLASS record before the first heap
-   * dump segment.
-   * The ID size used in the heap dump is 4 bytes.
+   * Parses a heap dump from a File.
+   * <p>
+   * The heap dump should be a heap dump in the J2SE HPROF format optionally
+   * with Android extensions and satisfying the following additional
+   * constraints:
+   * <ul>
+   * <li>
+   * Class serial numbers, stack frames, and stack traces individually satisfy
+   * the following:
+   * <ul>
+   *   <li> All elements are defined before they are referenced.
+   *   <li> Ids are densely packed in some range [a, b] where a is not necessarily 0.
+   *   <li> There are not more than 2^31 elements defined.
+   * </ul>
+   * <li> All classes are defined via a LOAD CLASS record before the first
+   * heap dump segment.
+   * <li> The ID size used in the heap dump is 4 bytes.
+   * </ul>
+   * <p>
+   * The given proguard map will be used to deobfuscate class names, field
+   * names, and stack traces in the heap dump.
+   *
+   * @param hprof the hprof file to parse
+   * @param map the proguard map for deobfuscation
+   * @return the parsed heap dump
+   * @throws IOException if the heap dump could not be read
+   * @throws HprofFormatException if the heap dump is not properly formatted
    */
   public static AhatSnapshot parseHeapDump(File hprof, ProguardMap map)
     throws IOException, HprofFormatException {
@@ -57,7 +79,33 @@
   }
 
   /**
-   * Parse a heap dump from a byte buffer.
+   * Parses a heap dump from a byte buffer.
+   * <p>
+   * The heap dump should be a heap dump in the J2SE HPROF format optionally
+   * with Android extensions and satisfying the following additional
+   * constraints:
+   * <ul>
+   * <li>
+   * Class serial numbers, stack frames, and stack traces individually satisfy
+   * the following:
+   * <ul>
+   *   <li> All elements are defined before they are referenced.
+   *   <li> Ids are densely packed in some range [a, b] where a is not necessarily 0.
+   *   <li> There are not more than 2^31 elements defined.
+   * </ul>
+   * <li> All classes are defined via a LOAD CLASS record before the first
+   * heap dump segment.
+   * <li> The ID size used in the heap dump is 4 bytes.
+   * </ul>
+   * <p>
+   * The given proguard map will be used to deobfuscate class names, field
+   * names, and stack traces in the heap dump.
+   *
+   * @param hprof the bytes of the hprof file to parse
+   * @param map the proguard map for deobfuscation
+   * @return the parsed heap dump
+   * @throws IOException if the heap dump could not be read
+   * @throws HprofFormatException if the heap dump is not properly formatted
    */
   public static AhatSnapshot parseHeapDump(ByteBuffer hprof, ProguardMap map)
     throws IOException, HprofFormatException {
@@ -853,7 +901,7 @@
     }
 
     public long getId() {
-      return mBuffer.getInt();
+      return mBuffer.getInt() & 0xFFFFFFFFL;
     }
 
     public boolean getBool() {
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/PathElement.java b/tools/ahat/src/main/com/android/ahat/heapdump/PathElement.java
index 196a246..5ce0b1e 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/PathElement.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/PathElement.java
@@ -16,11 +16,51 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * A single element along a reference path from a GC root to an instance in
+ * the heap dump.
+ * <p>
+ * For example, assuming object A is a root a path to some object X might look
+ * like:
+ * <pre>
+ *   A.x --&gt; B.y --&gt; C.z --&gt; X
+ * </pre>
+ *
+ * A path element is a single node of that path, such as <code>B.y</code>.
+ * @see AhatInstance#getPathFromGcRoot
+ */
 public class PathElement implements Diffable<PathElement> {
+  /**
+   * The instance along the reference path that this PathElement is associated
+   * with.
+   */
   public final AhatInstance instance;
+
+  /**
+   * A human readable description of which field in <code>instance</code> is
+   * followed to reach the next element in the path.
+   * Some examples:
+   * <ul>
+   * <li> "mBlah" for a class instance
+   * <li> "[4]" for an array instance
+   * <li> "" for the last element of the path
+   * </ul>
+   */
   public final String field;
+
+  /**
+   * True if <code>instance</code> is a (not necessarily immediate) dominator
+   * of the final object in the path.
+   */
   public boolean isDominator;
 
+  /**
+   * Constructs a PathElement object.
+   * <code>isDominator</code> is set to false.
+   *
+   * @param instance the path element instance
+   * @param field the path element field
+   */
   public PathElement(AhatInstance instance, String field) {
     this.instance = instance;
     this.field = field;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/RootType.java b/tools/ahat/src/main/com/android/ahat/heapdump/RootType.java
index 734f889..99d85dc 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/RootType.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/RootType.java
@@ -16,20 +16,80 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * Enumeration representing object root types as defined in the binary heap
+ * dump format specification.
+ */
 public enum RootType {
+  /**
+   * There is a JNI Global Reference for the object in question.
+   */
   JNI_GLOBAL      (1 <<  0),
+
+  /**
+   * There is a JNI Local Reference for the object in question.
+   */
   JNI_LOCAL       (1 <<  1),
+
+  /**
+   * The object in question is a parameter or local variable of a running
+   * method.
+   */
   JAVA_FRAME      (1 <<  2),
+
+  /**
+   * The object in question is a parameter of a running JNI method.
+   */
   NATIVE_STACK    (1 <<  3),
+
+  /**
+   * The object is a class object that cannot be unloaded.
+   */
   STICKY_CLASS    (1 <<  4),
+
+  /**
+   * The object is referenced from an active thread block.
+   */
   THREAD_BLOCK    (1 <<  5),
+
+  /**
+   * The object's monitor is currently in use.
+   */
   MONITOR         (1 <<  6),
+
+  /**
+   * The object is a running thread.
+   */
   THREAD          (1 <<  7),
+
+  /**
+   * The object is an interned string.
+   */
   INTERNED_STRING (1 <<  8),
+
+  /**
+   * The object is being used by the debugger.
+   */
   DEBUGGER        (1 <<  9),
+
+  /**
+   * The object is being used by the VM internally.
+   */
   VM_INTERNAL     (1 << 10),
+
+  /**
+   * The object has no given reason for being considered a root.
+   */
   UNKNOWN         (1 << 11),
+
+  /**
+   * The object's monitor is currently in use from JNI.
+   */
   JNI_MONITOR     (1 << 12),
+
+  /**
+   * The object is waiting to be finalized.
+   */
   FINALIZING      (1 << 13);
 
   final int mask;
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Site.java b/tools/ahat/src/main/com/android/ahat/heapdump/Site.java
index 4978d52..72c0a4a 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Site.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Site.java
@@ -24,6 +24,10 @@
 import java.util.List;
 import java.util.Map;
 
+/**
+ * Used to collection information about objects allocated at a particular
+ * allocation site.
+ */
 public class Site implements Diffable<Site> {
   // The site that this site was directly called from.
   // mParent is null for the root site.
@@ -61,18 +65,39 @@
 
   private Site mBaseline;
 
+  /**
+   * Summary information about instances allocated at a particular allocation
+   * site that are instances of a particular class and allocated on a
+   * particular heap.
+   */
   public static class ObjectsInfo implements Diffable<ObjectsInfo> {
+    /**
+     * The heap that the summarized objects belong to.
+     */
     public AhatHeap heap;
-    public AhatClassObj classObj;   // May be null.
+
+    /**
+     * The class of the summarized objects.
+     */
+    public AhatClassObj classObj;   // May be null. Not sure why.
+
+    /**
+     * The number of instances included in the summary.
+     */
     public long numInstances;
+
+    /**
+     * The sum of the shallow size of each instance included in the summary.
+     */
     public Size numBytes;
+
     private ObjectsInfo baseline;
 
     /**
-     * Construct a new, empty objects info for the given heap and class
+     * Constructs a new, empty objects info for the given heap and class
      * combination.
      */
-    public ObjectsInfo(AhatHeap heap, AhatClassObj classObj) {
+    ObjectsInfo(AhatHeap heap, AhatClassObj classObj) {
       this.heap = heap;
       this.classObj = classObj;
       this.numInstances = 0;
@@ -82,12 +107,14 @@
 
     /**
      * Returns the name of the class this ObjectsInfo is associated with.
+     *
+     * @return the name of this object info's class
      */
     public String getClassName() {
       return classObj == null ? "???" : classObj.getName();
     }
 
-    public void setBaseline(ObjectsInfo baseline) {
+    void setBaseline(ObjectsInfo baseline) {
       this.baseline = baseline;
     }
 
@@ -121,11 +148,11 @@
   }
 
   /**
-   * Get a child site of this site.
-   * Returns the site at which the instance was allocated.
-   * @param frames - The list of frames in the stack trace, starting with the
-   *                 inner-most frame. May be null, in which case this site is
-   *                 returned.
+   * Gets a child site of this site.
+   * @param frames the list of frames in the stack trace, starting with the
+   *               inner-most frame. May be null, in which case this site is
+   *               returned.
+   * @return the child site
    */
   Site getSite(ProguardMap.Frame[] frames) {
     return frames == null ? this : getSite(this, frames);
@@ -211,22 +238,29 @@
     return id;
   }
 
-  // Get the size of a site for a specific heap.
+  /**
+   * Returns the size of all objects on the given heap allocated at this site.
+   * Includes objects belonging to <code>heap</code> allocated at this and
+   * child sites.
+   *
+   * @param heap the heap to query the size for
+   * @return the total shallow size of objects in this site
+   */
   public Size getSize(AhatHeap heap) {
     return mSizesByHeap[heap.getIndex()];
   }
 
   /**
-   * Collect the objects allocated under this site, optionally filtered by
+   * Collects the objects allocated under this site, optionally filtered by
    * heap name or class name. Includes objects allocated in children sites.
-   * @param heapName - The name of the heap the collected objects should
-   *                   belong to. This may be null to indicate objects of
-   *                   every heap should be collected.
-   * @param className - The name of the class the collected objects should
-   *                    belong to. This may be null to indicate objects of
-   *                    every class should be collected.
-   * @param objects - Out parameter. A collection of objects that all
-   *                  collected objects should be added to.
+   * @param heapName the name of the heap the collected objects should
+   *                 belong to. This may be null to indicate objects of
+   *                 every heap should be collected.
+   * @param className the name of the class the collected objects should
+   *                  belong to. This may be null to indicate objects of
+   *                  every class should be collected.
+   * @param objects out parameter. A collection of objects that all
+   *                collected objects should be added to.
    */
   public void getObjects(String heapName, String className, Collection<AhatInstance> objects) {
     for (AhatInstance inst : mObjects) {
@@ -263,11 +297,24 @@
     return info;
   }
 
+  /**
+   * Return a summary breakdown of the objects allocated at this site.
+   * Objects are grouped by class and heap and summarized into a single
+   * {@link ObjectsInfo}. This method returns all the groups for this
+   * allocation site.
+   *
+   * @return all ObjectInfo summaries for instances allocated at this site
+   */
   public List<ObjectsInfo> getObjectsInfos() {
     return mObjectsInfos;
   }
 
-  // Get the combined size of the site for all heaps.
+  /**
+   * Returns the combined size of the site for all heaps.
+   * Includes all objects allocated at this and child sites.
+   *
+   * @return total shallow size of objects in this site
+   */
   public Size getTotalSize() {
     Size total = Size.ZERO;
     for (Size size : mSizesByHeap) {
@@ -277,39 +324,70 @@
   }
 
   /**
-   * Return the site this site was called from.
+   * Returns the site this site was called from.
    * Returns null for the root site.
+   *
+   * @return the site this site was called from
    */
   public Site getParent() {
     return mParent;
   }
 
+  /**
+   * Returns the name of the method this allocation site belongs to.
+   * For example, "equals".
+   *
+   * @return the method name of the allocation site
+   */
   public String getMethodName() {
     return mMethodName;
   }
 
+  /**
+   * Returns the signature of the method this allocation site belongs to.
+   * For example, "(Ljava/lang/Object;)Z".
+   *
+   * @return the signature of method the allocation site belongs to
+   */
   public String getSignature() {
     return mSignature;
   }
 
+  /**
+   * Returns the name of the Java file where this allocation site is found.
+   *
+   * @return the file the allocation site belongs to
+   */
   public String getFilename() {
     return mFilename;
   }
 
+  /**
+   * Returns the line number of the code in the source file that the
+   * allocation site refers to.
+   *
+   * @return the allocation site line number
+   */
   public int getLineNumber() {
     return mLineNumber;
   }
 
   /**
    * Returns the unique id of this site.
+   * This is an arbitrary unique id computed after processing the heap dump.
+   *
+   * @return the site id
    */
   public long getId() {
     return mId;
   }
 
   /**
-   * Find the child site with the given id.
+   * Returns the child site with the given id.
    * Returns null if no such site was found.
+   *
+   * @param id the id of the child site to find
+   * @return the found child site
    */
   public Site findSite(long id) {
     if (id == mId) {
@@ -341,6 +419,8 @@
 
   /**
    * Returns an unmodifiable list of this site's immediate children.
+   *
+   * @return this site's child sites
    */
   public List<Site> getChildren() {
     return Collections.unmodifiableList(mChildren);
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Size.java b/tools/ahat/src/main/com/android/ahat/heapdump/Size.java
index 7c8db90..a4593e1 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Size.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Size.java
@@ -17,47 +17,76 @@
 package com.android.ahat.heapdump;
 
 /**
- * The Size class is used to represent how much space an instance takes up.
- *
+ * Used to represent how much space an instance takes up.
  * An abstraction is introduced rather than using a long directly in order to
  * more easily keep track of the different components of the size. For
  * example, some instances may have associated native, code, or graphics
  * sizes.
- *
+ * <p>
  * Size objects are immutable.
  */
 public class Size {
   private final long mJavaSize;
   private final long mRegisteredNativeSize;
 
+  /**
+   * An instance of Size with 0 for all categories.
+   */
   public static Size ZERO = new Size(0, 0);
 
+  /**
+   * Constructs a new instance of Size.
+   *
+   * @param javaSize number of bytes in the java category
+   * @param registeredNativeSize number of bytes in the registeredNativeSize
+   *        category
+   */
   public Size(long javaSize, long registeredNativeSize) {
     mJavaSize = javaSize;
     mRegisteredNativeSize = registeredNativeSize;
   }
 
+  /**
+   * Returns the sum of the size of all categories.
+   *
+   * @return the total size
+   */
   public long getSize() {
     return mJavaSize + mRegisteredNativeSize;
   }
 
+  /**
+   * Returns the size of the java category.
+   *
+   * @return the java category size
+   */
   public long getJavaSize() {
     return mJavaSize;
   }
 
+  /**
+   * Returns the size of the registered native category.
+   *
+   * @return the registered native category size
+   */
   public long getRegisteredNativeSize() {
     return mRegisteredNativeSize;
   }
 
   /**
-   * Returns true if all the fields of this size object are zero.
+   * Returns true if all categories of this size are zero.
+   *
+   * @return true if the size is zero
    */
   public boolean isZero() {
     return mJavaSize == 0 && mRegisteredNativeSize == 0;
   }
 
   /**
-   * Return a new Size object that is the sum of this size and the other.
+   * Returns a new Size object that is the sum of this size and the other.
+   *
+   * @param other the size to sum with this size
+   * @return the new size object
    */
   public Size plus(Size other) {
     if (isZero()) {
@@ -71,8 +100,11 @@
   }
 
   /**
-   * Return a new Size object that has 'size' more registered native size than
-   * this Size object.
+   * Returns a new Size object that has <code>size</code> more registered
+   * native size than this Size object.
+   *
+   * @param size the size to add to the registered native category
+   * @return the new size object
    */
   public Size plusRegisteredNativeSize(long size) {
     return new Size(mJavaSize, mRegisteredNativeSize + size);
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Sort.java b/tools/ahat/src/main/com/android/ahat/heapdump/Sort.java
index efe0d6b..a629b3c 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Sort.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Sort.java
@@ -25,14 +25,14 @@
 /**
  * Provides Comparators and helper functions for sorting Instances, Sites, and
  * other things.
- *
+ * <p>
  * Note: The Comparators defined here impose orderings that are inconsistent
  * with equals. They should not be used for element lookup or search. They
  * should only be used for showing elements to the user in different orders.
  */
 public class Sort {
   /**
-   * Compare sizes by their total size.
+   * Compares sizes by their total size.
    * This sorts sizes from smaller total size to larger total size.
    */
   public static final Comparator<Size> SIZE_BY_SIZE = new Comparator<Size>() {
@@ -43,7 +43,7 @@
   };
 
   /**
-   * Compare instances by their total retained size.
+   * Compares instances by their total retained size.
    * Different instances with the same total retained size are considered
    * equal for the purposes of comparison.
    * This sorts instances from larger retained size to smaller retained size.
@@ -57,12 +57,12 @@
   };
 
   /**
-   * Compare instances by their retained size for a given heap index.
+   * Compares instances by their retained size for a given heap index.
    * Different instances with the same total retained size are considered
    * equal for the purposes of comparison.
    * This sorts instances from larger retained size to smaller retained size.
    */
-  public static class InstanceByHeapRetainedSize implements Comparator<AhatInstance> {
+  private static class InstanceByHeapRetainedSize implements Comparator<AhatInstance> {
     private AhatHeap mHeap;
 
     public InstanceByHeapRetainedSize(AhatHeap heap) {
@@ -76,16 +76,28 @@
   }
 
   /**
-   * Compare objects based on a list of comparators, giving priority to the
+   * Compares objects based on a list of comparators, giving priority to the
    * earlier comparators in the list.
    */
-  public static class WithPriority<T> implements Comparator<T> {
+  private static class WithPriority<T> implements Comparator<T> {
     private List<Comparator<T>> mComparators;
 
+    /**
+     * Constructs a comparator giving sort priority to earlier comparators in
+     * the list.
+     *
+     * @param comparators the list of comparators to use for sorting
+     */
     public WithPriority(Comparator<T>... comparators) {
       mComparators = Arrays.asList(comparators);
     }
 
+    /**
+     * Constructs a comparator giving sort priority to earlier comparators in
+     * the list.
+     *
+     * @param comparators the list of comparators to use for sorting
+     */
     public WithPriority(List<Comparator<T>> comparators) {
       mComparators = comparators;
     }
@@ -101,6 +113,27 @@
     }
   }
 
+  /**
+   * Returns a comparator that gives sort priority to earlier comparators in
+   * the list.
+   *
+   * @param <T> the type of object being sorted
+   * @param comparators the list of comparators to use for sorting
+   * @return the composite comparator
+   */
+  public static <T> Comparator<T> withPriority(Comparator<T>... comparators) {
+    return new WithPriority(comparators);
+  }
+
+  /**
+   * Returns a comparator that gives a default instance sort for the given
+   * snapshot.
+   * Objects are sorted by retained size, with priority given to the "app"
+   * heap if present.
+   *
+   * @param snapshot the snapshot to use the comparator with
+   * @return the default instance comparator
+   */
   public static Comparator<AhatInstance> defaultInstanceCompare(AhatSnapshot snapshot) {
     List<Comparator<AhatInstance>> comparators = new ArrayList<Comparator<AhatInstance>>();
 
@@ -116,14 +149,19 @@
   }
 
   /**
-   * Compare Sites by the size of objects allocated on a given heap.
+   * Compares Sites by the size of objects allocated on a given heap.
    * Different object infos with the same size on the given heap are
    * considered equal for the purposes of comparison.
    * This sorts sites from larger size to smaller size.
    */
-  public static class SiteByHeapSize implements Comparator<Site> {
+  private static class SiteByHeapSize implements Comparator<Site> {
     AhatHeap mHeap;
 
+    /**
+     * Constructs a SiteByHeapSize comparator.
+     *
+     * @param heap the heap to use when comparing sizes
+     */
     public SiteByHeapSize(AhatHeap heap) {
       mHeap = heap;
     }
@@ -135,7 +173,7 @@
   }
 
   /**
-   * Compare Sites by the total size of objects allocated.
+   * Compares Sites by the total size of objects allocated.
    * This sorts sites from larger size to smaller size.
    */
   public static final Comparator<Site> SITE_BY_TOTAL_SIZE = new Comparator<Site>() {
@@ -145,6 +183,14 @@
     }
   };
 
+  /**
+   * Compares Sites using a default comparison order.
+   * This sorts sites from larger size to smaller size, giving preference to
+   * sites with more allocation on the "app" heap, if present.
+   *
+   * @param snapshot the snapshot to use the comparator with
+   * @return the default site comparator
+   */
   public static Comparator<Site> defaultSiteCompare(AhatSnapshot snapshot) {
     List<Comparator<Site>> comparators = new ArrayList<Comparator<Site>>();
 
@@ -174,7 +220,7 @@
   };
 
   /**
-   * Compare Site.ObjectsInfo by heap name.
+   * Compares Site.ObjectsInfo by heap name.
    * Different object infos with the same heap name are considered equal for
    * the purposes of comparison.
    */
@@ -187,7 +233,7 @@
   };
 
   /**
-   * Compare Site.ObjectsInfo by class name.
+   * Compares Site.ObjectsInfo by class name.
    * Different object infos with the same class name are considered equal for
    * the purposes of comparison.
    */
@@ -202,7 +248,7 @@
   };
 
   /**
-   * Compare FieldValue by field name.
+   * Compares FieldValue by field name.
    */
   public static final Comparator<FieldValue> FIELD_VALUE_BY_NAME
     = new Comparator<FieldValue>() {
@@ -213,7 +259,7 @@
   };
 
   /**
-   * Compare FieldValue by type name.
+   * Compares FieldValue by type name.
    */
   public static final Comparator<FieldValue> FIELD_VALUE_BY_TYPE
     = new Comparator<FieldValue>() {
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Type.java b/tools/ahat/src/main/com/android/ahat/heapdump/Type.java
index 4024961..ff79864 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Type.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Type.java
@@ -16,18 +16,63 @@
 
 package com.android.ahat.heapdump;
 
+/**
+ * Enum corresponding to basic types from the binary heap dump format.
+ */
 public enum Type {
+  /**
+   * Type used for any Java object.
+   */
   OBJECT("Object", 4),
+
+  /**
+   * The primitive boolean type.
+   */
   BOOLEAN("boolean", 1),
+
+  /**
+   * The primitive char type.
+   */
   CHAR("char", 2),
+
+  /**
+   * The primitive float type.
+   */
   FLOAT("float", 4),
+
+  /**
+   * The primitive double type.
+   */
   DOUBLE("double", 8),
+
+  /**
+   * The primitive byte type.
+   */
   BYTE("byte", 1),
+
+  /**
+   * The primitive short type.
+   */
   SHORT("short", 2),
+
+  /**
+   * The primitive int type.
+   */
   INT("int", 4),
+
+  /**
+   * The primitive long type.
+   */
   LONG("long", 8);
 
+  /**
+   * The name of the type.
+   */
   public final String name;
+
+  /**
+   * The number of bytes taken up by values of this type in the Java heap.
+   */
   final int size;
 
   Type(String name, int size) {
diff --git a/tools/ahat/src/main/com/android/ahat/heapdump/Value.java b/tools/ahat/src/main/com/android/ahat/heapdump/Value.java
index eea4277..b219bf1 100644
--- a/tools/ahat/src/main/com/android/ahat/heapdump/Value.java
+++ b/tools/ahat/src/main/com/android/ahat/heapdump/Value.java
@@ -17,48 +17,107 @@
 package com.android.ahat.heapdump;
 
 /**
- * Value represents a field value in a heap dump. The field value is either a
- * subclass of AhatInstance or a primitive Java type.
+ * A Java instance or primitive value from a parsed heap dump.
+ * Note: To save memory, a null Value is used to represent a null Java
+ * instance from the heap dump.
  */
 public abstract class Value {
+  /**
+   * Constructs a Value for an AhatInstance.
+   * Note: returns null for null <code>value</code>.
+   *
+   * @param value the AhatInstance to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(AhatInstance value) {
     return value == null ? null : new InstanceValue(value);
   }
 
+  /**
+   * Constructs a Value for a boolean.
+   *
+   * @param value the boolean to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(boolean value) {
     return new BooleanValue(value);
   }
 
+  /**
+   * Constructs a Value for a char.
+   *
+   * @param value the char to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(char value) {
     return new CharValue(value);
   }
 
+  /**
+   * Constructs a Value for a float.
+   *
+   * @param value the float to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(float value) {
     return new FloatValue(value);
   }
 
+  /**
+   * Constructs a Value for a double.
+   *
+   * @param value the double to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(double value) {
     return new DoubleValue(value);
   }
 
+  /**
+   * Constructs a Value for a byte.
+   *
+   * @param value the byte to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(byte value) {
     return new ByteValue(value);
   }
 
+  /**
+   * Constructs a Value for a short.
+   *
+   * @param value the short to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(short value) {
     return new ShortValue(value);
   }
 
+  /**
+   * Constructs a Value for a int.
+   *
+   * @param value the int to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(int value) {
     return new IntValue(value);
   }
 
+  /**
+   * Constructs a Value for a long.
+   *
+   * @param value the long to make into a value
+   * @return the constructed value.
+   */
   public static Value pack(long value) {
     return new LongValue(value);
   }
 
   /**
-   * Return the type of the given value.
+   * Returns the type of the given value.
+   *
+   * @param value the value to get the type of
+   * @return the value's type
    */
   public static Type getType(Value value) {
     return value == null ? Type.OBJECT : value.getType();
@@ -70,62 +129,78 @@
   abstract Type getType();
 
   /**
-   * Returns true if the Value is an AhatInstance, as opposed to a Java
-   * primitive value.
+   * Returns true if the Value is an AhatInstance rather than a primitive
+   * value.
+   *
+   * @return true if the value is an AhatInstance
    */
   public boolean isAhatInstance() {
     return false;
   }
 
   /**
-   * Return the Value as an AhatInstance if it is one.
+   * Returns the Value as an AhatInstance if it is one.
    * Returns null if the Value represents a Java primitive value.
+   *
+   * @return the AhatInstance packed into this value
    */
   public AhatInstance asAhatInstance() {
     return null;
   }
 
   /**
-   * Returns true if the Value is an Integer.
+   * Returns true if the Value is an int.
+   *
+   * @return true if the value is an int.
    */
   public boolean isInteger() {
     return false;
   }
 
   /**
-   * Return the Value as an Integer if it is one.
-   * Returns null if the Value does not represent an Integer.
+   * Returns the Value as an int if it is one.
+   * Returns null if the Value does not represent an int.
+   *
+   * @return the int packed into this value
    */
   public Integer asInteger() {
     return null;
   }
 
   /**
-   * Returns true if the Value is an Long.
+   * Returns true if the Value is an long.
+   *
+   * @return true if the value is an long.
    */
   public boolean isLong() {
     return false;
   }
 
   /**
-   * Return the Value as an Long if it is one.
-   * Returns null if the Value does not represent an Long.
+   * Returns the Value as an long if it is one.
+   * Returns null if the Value does not represent an long.
+   *
+   * @return the long packed into this value
    */
   public Long asLong() {
     return null;
   }
 
   /**
-   * Return the Value as a Byte if it is one.
-   * Returns null if the Value does not represent a Byte.
+   * Returns the Value as an byte if it is one.
+   * Returns null if the Value does not represent an byte.
+   *
+   * @return the byte packed into this value
    */
   public Byte asByte() {
     return null;
   }
 
   /**
-   * Return the Value as a Char if it is one.
-   * Returns null if the Value does not represent a Char.
+   * Returns the Value as an char if it is one.
+   * Returns null if the Value does not represent an char.
+   *
+   * @return the char packed into this value
    */
   public Character asChar() {
     return null;
@@ -134,10 +209,18 @@
   @Override
   public abstract String toString();
 
-  public Value getBaseline() {
+  private Value getBaseline() {
     return this;
   }
 
+  /**
+   * Returns the baseline of the given value for the purposes of diff.
+   * This method can be used to handle the case when the Value is null.
+   *
+   * @param value the value to get the baseline of
+   * @return the baseline of the value
+   * @see Diffable#getBaseline
+   */
   public static Value getBaseline(Value value) {
     return value == null ? null : value.getBaseline();
   }
@@ -313,7 +396,6 @@
       return mInstance.toString();
     }
 
-    @Override
     public Value getBaseline() {
       return InstanceValue.pack(mInstance.getBaseline());
     }
diff --git a/tools/ahat/src/main/com/android/ahat/proguard/ProguardMap.java b/tools/ahat/src/main/com/android/ahat/proguard/ProguardMap.java
index 32bb209..79a737c 100644
--- a/tools/ahat/src/main/com/android/ahat/proguard/ProguardMap.java
+++ b/tools/ahat/src/main/com/android/ahat/proguard/ProguardMap.java
@@ -26,7 +26,10 @@
 import java.util.HashMap;
 import java.util.Map;
 
-// Class used to deobfuscate classes, fields, and stack frames.
+/**
+ * A representation of a proguard mapping for deobfuscating class names,
+ * field names, and stack frames.
+ */
 public class ProguardMap {
 
   private static final String ARRAY_SYMBOL = "[]";
@@ -98,6 +101,10 @@
   private Map<String, ClassData> mClassesFromClearName = new HashMap<String, ClassData>();
   private Map<String, ClassData> mClassesFromObfuscatedName = new HashMap<String, ClassData>();
 
+  /**
+   * Information associated with a stack frame that identifies a particular
+   * line of source code.
+   */
   public static class Frame {
     Frame(String method, String signature, String filename, int line) {
       this.method = method;
@@ -106,9 +113,28 @@
       this.line = line;
     }
 
+    /**
+     * The name of the method the stack frame belongs to.
+     * For example, "equals".
+     */
     public final String method;
+
+    /**
+     * The signature of the method the stack frame belongs to.
+     * For example, "(Ljava/lang/Object;)Z".
+     */
     public final String signature;
+
+    /**
+     * The name of the file with containing the line of source that the stack
+     * frame refers to.
+     */
     public final String filename;
+
+    /**
+     * The line number of the code in the source file that the stack frame
+     * refers to.
+     */
     public final int line;
   }
 
@@ -116,13 +142,44 @@
     throw new ParseException(msg, 0);
   }
 
-  // Read in proguard mapping information from the given file.
+  /**
+   * Creates a new empty proguard mapping.
+   * The {@link #readFromFile readFromFile} and
+   * {@link #readFromReader readFromReader} methods can be used to populate
+   * the proguard mapping with proguard mapping information.
+   */
+  public ProguardMap() {
+  }
+
+  /**
+   * Adds the proguard mapping information in <code>mapFile</code> to this
+   * proguard mapping.
+   * The <code>mapFile</code> should be a proguard mapping file generated with
+   * the <code>-printmapping</code> option when proguard was run.
+   *
+   * @param mapFile the name of a file with proguard mapping information
+   * @throws FileNotFoundException If the <code>mapFile</code> could not be
+   *                               found
+   * @throws IOException If an input exception occurred.
+   * @throws ParseException If the <code>mapFile</code> is not a properly
+   *                        formatted proguard mapping file.
+   */
   public void readFromFile(File mapFile)
     throws FileNotFoundException, IOException, ParseException {
     readFromReader(new FileReader(mapFile));
   }
 
-  // Read in proguard mapping information from the given Reader.
+  /**
+   * Adds the proguard mapping information read from <code>mapReader</code> to
+   * this proguard mapping.
+   * <code>mapReader</code> should be a Reader of a proguard mapping file
+   * generated with the <code>-printmapping</code> option when proguard was run.
+   *
+   * @param mapReader a Reader for reading the proguard mapping information
+   * @throws IOException If an input exception occurred.
+   * @throws ParseException If the <code>mapFile</code> is not a properly
+   *                        formatted proguard mapping file.
+   */
   public void readFromReader(Reader mapReader) throws IOException, ParseException {
     BufferedReader reader = new BufferedReader(mapReader);
     String line = reader.readLine();
@@ -207,8 +264,15 @@
     reader.close();
   }
 
-  // Returns the deobfuscated version of the given class name. If no
-  // deobfuscated version is known, the original string is returned.
+  /**
+   * Returns the deobfuscated version of the given obfuscated class name.
+   * If this proguard mapping does not include information about how to
+   * deobfuscate the obfuscated class name, the obfuscated class name
+   * is returned.
+   *
+   * @param obfuscatedClassName the obfuscated class name to deobfuscate
+   * @return the deobfuscated class name.
+   */
   public String getClassName(String obfuscatedClassName) {
     // Class names for arrays may have trailing [] that need to be
     // stripped before doing the lookup.
@@ -224,9 +288,17 @@
     return clearBaseName + arraySuffix;
   }
 
-  // Returns the deobfuscated version of the given field name for the given
-  // (clear) class name. If no deobfuscated version is known, the original
-  // string is returned.
+  /**
+   * Returns the deobfuscated version of the obfuscated field name for the
+   * given deobfuscated class name.
+   * If this proguard mapping does not include information about how to
+   * deobfuscate the obfuscated field name, the obfuscated field name is
+   * returned.
+   *
+   * @param clearClass the deobfuscated name of the class the field belongs to
+   * @param obfuscatedField the obfuscated field name to deobfuscate
+   * @return the deobfuscated field name.
+   */
   public String getFieldName(String clearClass, String obfuscatedField) {
     ClassData classData = mClassesFromClearName.get(clearClass);
     if (classData == null) {
@@ -235,8 +307,21 @@
     return classData.getField(obfuscatedField);
   }
 
-  // Returns the deobfuscated frame for the given obfuscated frame and (clear)
-  // class name. As much of the frame is deobfuscated as can be.
+  /**
+   * Returns the deobfuscated version of the obfuscated stack frame
+   * information for the given deobfuscated class name.
+   * If this proguard mapping does not include information about how to
+   * deobfuscate the obfuscated stack frame information, the obfuscated stack
+   * frame information is returned.
+   *
+   * @param clearClassName the deobfuscated name of the class the stack frame's
+   * method belongs to
+   * @param obfuscatedMethodName the obfuscated method name to deobfuscate
+   * @param obfuscatedSignature the obfuscated method signature to deobfuscate
+   * @param obfuscatedFilename the obfuscated file name to deobfuscate.
+   * @param obfuscatedLine the obfuscated line number to deobfuscate.
+   * @return the deobfuscated stack frame information.
+   */
   public Frame getFrame(String clearClassName, String obfuscatedMethodName,
       String obfuscatedSignature, String obfuscatedFilename, int obfuscatedLine) {
     String clearSignature = getSignature(obfuscatedSignature);
diff --git a/tools/checker/README b/tools/checker/README
index 65f5bd2..b8dd803 100644
--- a/tools/checker/README
+++ b/tools/checker/README
@@ -76,3 +76,10 @@
   /// CHECK-START-ARM64: int MyClass.MyMethod() constant_folding (after)
   /// CHECK:         <<ID:i\d+>>  IntConstant {{11|22}}
   /// CHECK:                      Return [<<ID>>]
+
+For convenience, several architectures can be specified as set after the
+'CHECK-START' keyword. Any listed architecture will match in that case,
+thereby avoiding to repeat the check lines if some, but not all architectures
+match. An example line looks like:
+
+  /// CHECK-START-{MIPS,ARM,ARM64}: int MyClass.MyMethod() constant_folding (after)
diff --git a/tools/checker/checker.py b/tools/checker/checker.py
index 2e9faba..65b01a7 100755
--- a/tools/checker/checker.py
+++ b/tools/checker/checker.py
@@ -90,7 +90,8 @@
   for checkFilename in FindCheckerFiles(checkPath):
     checkerFile = ParseCheckerStream(os.path.basename(checkFilename),
                                      checkPrefix,
-                                     open(checkFilename, "r"))
+                                     open(checkFilename, "r"),
+                                     targetArch)
     MatchFiles(checkerFile, c1File, targetArch, debuggableMode)
 
 
diff --git a/tools/checker/file_format/checker/parser.py b/tools/checker/file_format/checker/parser.py
index f199a50..7a5a4c8 100644
--- a/tools/checker/file_format/checker/parser.py
+++ b/tools/checker/file_format/checker/parser.py
@@ -44,7 +44,33 @@
   else:
     return None
 
-def __processLine(line, lineNo, prefix, fileName):
+def __preprocessLineForStart(prefix, line, targetArch):
+  """ This function modifies a CHECK-START-{x,y,z} into a matching
+      CHECK-START-y line for matching targetArch y. If no matching
+      architecture is found, CHECK-START-x is returned arbitrarily
+      to ensure all following check lines are put into a test that
+      is skipped. Any other line is left unmodified.
+  """
+  if targetArch is not None:
+    if prefix in line:
+      # Find { } on the line and assume that defines the set.
+      s = line.find('{')
+      e = line.find('}')
+      if 0 < s and s < e:
+        archs = line[s+1:e].split(',')
+        # First verify that every archs is valid. Return the
+        # full line on failure to prompt error back to user.
+        for arch in archs:
+          if not arch in archs_list:
+            return line
+        # Now accept matching arch or arbitrarily return first.
+        if targetArch in archs:
+          return line[:s] + targetArch + line[e + 1:]
+        else:
+          return line[:s] + archs[0] + line[e + 1:]
+  return line
+
+def __processLine(line, lineNo, prefix, fileName, targetArch):
   """ This function is invoked on each line of the check file and returns a triplet
       which instructs the parser how the line should be handled. If the line is
       to be included in the current check group, it is returned in the first
@@ -56,10 +82,11 @@
     return None, None, None
 
   # Lines beginning with 'CHECK-START' start a new test case.
-  # We currently only consider the architecture suffix in "CHECK-START" lines.
+  # We currently only consider the architecture suffix(es) in "CHECK-START" lines.
   for debuggable in [True, False]:
+    sline = __preprocessLineForStart(prefix + "-START", line, targetArch)
     for arch in [None] + archs_list:
-      startLine = __extractLine(prefix + "-START", line, arch, debuggable)
+      startLine = __extractLine(prefix + "-START", sline, arch, debuggable)
       if startLine is not None:
         return None, startLine, (arch, debuggable)
 
@@ -164,9 +191,9 @@
         assertion.addExpression(TestExpression.createPatternFromPlainText(text))
   return assertion
 
-def ParseCheckerStream(fileName, prefix, stream):
+def ParseCheckerStream(fileName, prefix, stream, targetArch = None):
   checkerFile = CheckerFile(fileName)
-  fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix, fileName)
+  fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix, fileName, targetArch)
   fnLineOutsideChunk = lambda line, lineNo: \
       Logger.fail("Checker line not inside a group", fileName, lineNo)
   for caseName, caseLines, startLineNo, testData in \
diff --git a/tools/libjdwp_oj_art_failures.txt b/tools/external_oj_libjdwp_art_failures.txt
similarity index 92%
rename from tools/libjdwp_oj_art_failures.txt
rename to tools/external_oj_libjdwp_art_failures.txt
index e1cc831..1178af4 100644
--- a/tools/libjdwp_oj_art_failures.txt
+++ b/tools/external_oj_libjdwp_art_failures.txt
@@ -1,6 +1,9 @@
 /*
  * This file contains expectations for ART's buildbot. The purpose of this file is
  * to temporarily list failing tests and not break the bots.
+ *
+ * This file contains the expectations for the 'libjdwp-aot' and 'libjdwp-jit'
+ * test groups on the chromium buildbot.
  */
 [
 {
@@ -69,15 +72,16 @@
   name: "org.apache.harmony.jpda.tests.jdwp.ObjectReference.IsCollectedTest#testIsCollected001"
 },
 {
-  description: "Test for ddms extensions that are not yet implemented",
-  result: EXEC_FAILED,
-  bug: 69169846,
-  name: "org.apache.harmony.jpda.tests.jdwp.DDM.DDMTest#testChunk001"
-},
-{
   description: "Test crashes",
   result: EXEC_FAILED,
   bug: 69591477,
   name: "org.apache.harmony.jpda.tests.jdwp.VirtualMachine.ExitTest#testExit001"
+},
+{
+  description: "Test times out on fugu-debug",
+  result: EXEC_FAILED,
+  bug: 70459916,
+  names: [ "org.apache.harmony.jpda.tests.jdwp.VMDebug.VMDebugTest#testVMDebug",
+           "org.apache.harmony.jpda.tests.jdwp.VMDebug.VMDebugTest002#testVMDebug" ]
 }
 ]
diff --git a/tools/jfuzz/README.md b/tools/jfuzz/README.md
index 10d175b..bee2396 100644
--- a/tools/jfuzz/README.md
+++ b/tools/jfuzz/README.md
@@ -28,6 +28,8 @@
          (higher values yield deeper nested conditionals)
     -n : defines a fuzzing nest for for/while/do-while loops
          (higher values yield deeper nested loops)
+    -t : defines a fuzzing nest for try-catch-finally blocks
+         (higher values yield deeper nested try-catch-finally blocks)
     -v : prints version number and exits
     -h : prints help and exits
 
@@ -48,7 +50,7 @@
                           [--report_script=SCRIPT]
                           [--jfuzz_arg=ARG]
                           [--true_divergence]
-                          [--use_dx]
+                          [--dexer=DEXER]
 
 where
 
@@ -64,7 +66,7 @@
     --report_script   : path to script called for each divergence
     --jfuzz_arg       : argument for jfuzz
     --true_divergence : don't bisect timeout divergences
-    --use_dx          : use dx (rather than jack)
+    --dexer=DEXER     : use either dx, d8, or jack to obtain dex files
 
 How to start JFuzz nightly testing
 ==================================
@@ -85,14 +87,14 @@
                           [--num_tests=NUM_TESTS]
                           [--num_inputs=NUM_INPUTS]
                           [--device=DEVICE]
-                          [--use_dx]
+                          [--dexer=DEXER]
 
 where
 
-    --num_tests : number of tests to run (10000 by default)
-    --num_inputs: number of JFuzz programs to generate
-    --device    : target device serial number (passed to adb -s)
-    --use_dx    : use dx (rather than jack)
+    --num_tests   : number of tests to run (10000 by default)
+    --num_inputs  : number of JFuzz programs to generate
+    --device      : target device serial number (passed to adb -s)
+    --dexer=DEXER : use either dx, d8, or jack to obtain dex files
 
 Background
 ==========
diff --git a/tools/jfuzz/jfuzz.cc b/tools/jfuzz/jfuzz.cc
index 7990c6c..a6034c8 100644
--- a/tools/jfuzz/jfuzz.cc
+++ b/tools/jfuzz/jfuzz.cc
@@ -31,8 +31,6 @@
  * Operators.
  */
 
-#define EMIT(x) fputs((x)[random0(sizeof(x)/sizeof(const char*))], out_);
-
 static constexpr const char* kIncDecOps[]   = { "++", "--" };
 static constexpr const char* kIntUnaryOps[] = { "+", "-", "~" };
 static constexpr const char* kFpUnaryOps[]  = { "+", "-" };
@@ -51,11 +49,21 @@
 static constexpr const char* kRelOps[]     = { "==", "!=", ">", ">=", "<", "<=" };
 
 /*
+ * Exceptions.
+ */
+static const char* kExceptionTypes[] = {
+  "IllegalStateException",
+  "NullPointerException",
+  "IllegalArgumentException",
+  "ArrayIndexOutOfBoundsException"
+};
+
+/*
  * Version of JFuzz. Increase this each time changes are made to the program
  * to preserve the property that a given version of JFuzz yields the same
  * fuzzed program for a deterministic random seed.
  */
-const char* VERSION = "1.4";
+const char* VERSION = "1.5";
 
 /*
  * Maximum number of array dimensions, together with corresponding maximum size
@@ -64,6 +72,14 @@
 static const uint32_t kMaxDim = 10;
 static const uint32_t kMaxDimSize[kMaxDim + 1] = { 0, 1000, 32, 10, 6, 4, 3, 3, 2, 2, 2 };
 
+/*
+ * Utility function to return the number of elements in an array.
+ */
+template <typename T, uint32_t N>
+constexpr uint32_t countof(T const (&)[N]) {
+  return N;
+}
+
 /**
  * A class that generates a random program that compiles correctly. The program
  * is generated using rules that generate various programming constructs. Each rule
@@ -78,7 +94,8 @@
         uint32_t expr_depth,
         uint32_t stmt_length,
         uint32_t if_nest,
-        uint32_t loop_nest)
+        uint32_t loop_nest,
+        uint32_t try_nest)
       : out_(out),
         fuzz_random_engine_(seed),
         fuzz_seed_(seed),
@@ -86,6 +103,7 @@
         fuzz_stmt_length_(stmt_length),
         fuzz_if_nest_(if_nest),
         fuzz_loop_nest_(loop_nest),
+        fuzz_try_nest_(try_nest),
         return_type_(randomType()),
         array_type_(randomType()),
         array_dim_(random1(kMaxDim)),
@@ -97,6 +115,7 @@
         loop_nest_(0),
         switch_nest_(0),
         do_nest_(0),
+        try_nest_(0),
         boolean_local_(0),
         int_local_(0),
         long_local_(0),
@@ -168,6 +187,12 @@
     }
   }
 
+  // Emits a random strong selected from an array of operator strings.
+  template <std::uint32_t N>
+  inline void emitOneOf(const char* const (&ops)[N]) {
+    fputs(ops[random0(N)], out_);
+  }
+
   //
   // Expressions.
   //
@@ -177,9 +202,9 @@
     if (tp == kBoolean) {
       fputc('!', out_);
     } else if (isInteger(tp)) {
-      EMIT(kIntUnaryOps);
+      emitOneOf(kIntUnaryOps);
     } else {  // isFP(tp)
-      EMIT(kFpUnaryOps);
+      emitOneOf(kFpUnaryOps);
     }
   }
 
@@ -188,38 +213,38 @@
     if (tp == kBoolean) {
       // Not applicable, just leave "as is".
     } else {  // isInteger(tp) || isFP(tp)
-      EMIT(kIncDecOps);
+      emitOneOf(kIncDecOps);
     }
   }
 
   // Emit a binary operator (same type in-out).
   void emitBinaryOp(Type tp) {
     if (tp == kBoolean) {
-      EMIT(kBoolBinOps);
+      emitOneOf(kBoolBinOps);
     } else if (isInteger(tp)) {
-      EMIT(kIntBinOps);
+      emitOneOf(kIntBinOps);
     } else {  // isFP(tp)
-      EMIT(kFpBinOps);
+      emitOneOf(kFpBinOps);
     }
   }
 
   // Emit an assignment operator (same type in-out).
   void emitAssignmentOp(Type tp) {
     if (tp == kBoolean) {
-      EMIT(kBoolAssignOps);
+      emitOneOf(kBoolAssignOps);
     } else if (isInteger(tp)) {
-      EMIT(kIntAssignOps);
+      emitOneOf(kIntAssignOps);
     } else {  // isFP(tp)
-      EMIT(kFpAssignOps);
+      emitOneOf(kFpAssignOps);
     }
   }
 
   // Emit a relational operator (one type in, boolean out).
   void emitRelationalOp(Type tp) {
     if (tp == kBoolean) {
-      EMIT(kBoolRelOps);
+      emitOneOf(kBoolRelOps);
     } else {  // isInteger(tp) || isFP(tp)
-      EMIT(kRelOps);
+      emitOneOf(kRelOps);
     }
   }
 
@@ -808,7 +833,7 @@
     fputs("{\n", out_);
     indentation_ += 2;
     emitIndentation();
-    fprintf(out_, "int i%u = %d;", loop_nest_, isWhile ? -1 : 0);
+    fprintf(out_, "int i%u = %d;\n", loop_nest_, isWhile ? -1 : 0);
     emitIndentation();
     if (isWhile) {
       fprintf(out_, "while (++i%u < ", loop_nest_);
@@ -871,6 +896,73 @@
     return mayFollowTrue || mayFollowFalse;
   }
 
+  bool emitTry() {
+    fputs("try {\n", out_);
+    indentation_ += 2;
+    bool mayFollow = emitStatementList();
+    indentation_ -= 2;
+    emitIndentation();
+    fputc('}', out_);
+    return mayFollow;
+  }
+
+  bool emitCatch() {
+    uint32_t count = random1(countof(kExceptionTypes));
+    bool mayFollow = false;
+    for (uint32_t i = 0; i < count; ++i) {
+      fprintf(out_, " catch (%s ex%u_%u) {\n", kExceptionTypes[i], try_nest_, i);
+      indentation_ += 2;
+      mayFollow |= emitStatementList();
+      indentation_ -= 2;
+      emitIndentation();
+      fputc('}', out_);
+    }
+    return mayFollow;
+  }
+
+  bool emitFinally() {
+    fputs(" finally {\n", out_);
+    indentation_ += 2;
+    bool mayFollow = emitStatementList();
+    indentation_ -= 2;
+    emitIndentation();
+    fputc('}', out_);
+    return mayFollow;
+  }
+
+  // Emit a try-catch-finally block.
+  bool emitTryCatchFinally() {
+    // Apply a hard limit on the number of catch blocks. This is for
+    // javac which fails if blocks within try-catch-finally are too
+    // large (much less than you'd expect).
+    if (try_nest_ > fuzz_try_nest_) {
+      return emitAssignment();  // fall back
+    }
+
+    ++try_nest_;  // Entering try-catch-finally
+
+    bool mayFollow = emitTry();
+    switch (random0(3)) {
+      case 0:  // try..catch
+        mayFollow |= emitCatch();
+        break;
+      case 1:  // try..finally
+        mayFollow &= emitFinally();
+        break;
+      case 2:  // try..catch..finally
+        // When determining whether code may follow, we observe that a
+        // finally block always follows after try and catch
+        // block. Code may only follow if the finally block permits
+        // and either the try or catch block allows code to follow.
+        mayFollow = (mayFollow | emitCatch()) & emitFinally();
+        break;
+    }
+    fputc('\n', out_);
+
+    --try_nest_;  // Leaving try-catch-finally
+    return mayFollow;
+  }
+
   // Emit a switch statement.
   bool emitSwitch() {
     // Continuing if nest becomes less likely as the depth grows.
@@ -915,6 +1007,11 @@
     return mayFollow;
   }
 
+  bool emitNopCall() {
+    fputs("nop();\n", out_);
+    return true;
+  }
+
   // Emit an assignment statement.
   bool emitAssignment() {
     Type tp = randomType();
@@ -930,16 +1027,18 @@
   // Emit a single statement. Returns true if statements may follow.
   bool emitStatement() {
     switch (random1(16)) {  // favor assignments
-      case 1:  return emitReturn(false); break;
-      case 2:  return emitContinue();    break;
-      case 3:  return emitBreak();       break;
-      case 4:  return emitScope();       break;
-      case 5:  return emitArrayInit();   break;
-      case 6:  return emitForLoop();     break;
-      case 7:  return emitDoLoop();      break;
-      case 8:  return emitIfStmt();      break;
-      case 9:  return emitSwitch();      break;
-      default: return emitAssignment();  break;
+      case 1:  return emitReturn(false);     break;
+      case 2:  return emitContinue();        break;
+      case 3:  return emitBreak();           break;
+      case 4:  return emitScope();           break;
+      case 5:  return emitArrayInit();       break;
+      case 6:  return emitForLoop();         break;
+      case 7:  return emitDoLoop();          break;
+      case 8:  return emitIfStmt();          break;
+      case 9:  return emitSwitch();          break;
+      case 10: return emitTryCatchFinally(); break;
+      case 11: return emitNopCall();         break;
+      default: return emitAssignment();      break;
     }
   }
 
@@ -1109,6 +1208,11 @@
     fputs("  }\n", out_);
   }
 
+  // Emit a static void method.
+  void emitStaticNopMethod() {
+    fputs("  public static void nop() {}\n", out_);
+  }
+
   // Emit program header. Emit command line options in the comments.
   void emitHeader() {
     fputs("\n/**\n * AOSP JFuzz Tester.\n", out_);
@@ -1133,6 +1237,7 @@
     emitArrayDecl();
     emitTestConstructor();
     emitTestMethod();
+    emitStaticNopMethod();
     emitMainMethod();
     indentation_ -= 2;
     fputs("}\n\n", out_);
@@ -1167,6 +1272,7 @@
   const uint32_t fuzz_stmt_length_;
   const uint32_t fuzz_if_nest_;
   const uint32_t fuzz_loop_nest_;
+  const uint32_t fuzz_try_nest_;
 
   // Return and array setup.
   const Type return_type_;
@@ -1182,6 +1288,7 @@
   uint32_t loop_nest_;
   uint32_t switch_nest_;
   uint32_t do_nest_;
+  uint32_t try_nest_;
   uint32_t boolean_local_;
   uint32_t int_local_;
   uint32_t long_local_;
@@ -1203,6 +1310,7 @@
   uint32_t stmt_length = 8;
   uint32_t if_nest = 2;
   uint32_t loop_nest = 3;
+  uint32_t try_nest = 2;
 
   // Parse options.
   while (1) {
@@ -1226,6 +1334,9 @@
       case 'n':
         loop_nest = strtoul(optarg, nullptr, 0);
         break;
+      case 't':
+        try_nest = strtoul(optarg, nullptr, 0);
+        break;
       case 'v':
         fprintf(stderr, "jfuzz version %s\n", VERSION);
         return 0;
@@ -1234,7 +1345,7 @@
         fprintf(stderr,
                 "usage: %s [-s seed] "
                 "[-d expr-depth] [-l stmt-length] "
-                "[-i if-nest] [-n loop-nest] [-v] [-h]\n",
+                "[-i if-nest] [-n loop-nest] [-t try-nest] [-v] [-h]\n",
                 argv[0]);
         return 1;
     }
@@ -1244,7 +1355,7 @@
   srand(seed);
 
   // Generate fuzzed program.
-  JFuzz fuzz(stdout, seed, expr_depth, stmt_length, if_nest, loop_nest);
+  JFuzz fuzz(stdout, seed, expr_depth, stmt_length, if_nest, loop_nest, try_nest);
   fuzz.emitProgram();
   return 0;
 }
diff --git a/tools/jfuzz/run_dex_fuzz_test.py b/tools/jfuzz/run_dex_fuzz_test.py
index ca0aec0..fdff9c0 100755
--- a/tools/jfuzz/run_dex_fuzz_test.py
+++ b/tools/jfuzz/run_dex_fuzz_test.py
@@ -41,14 +41,14 @@
 class DexFuzzTester(object):
   """Tester that feeds JFuzz programs into DexFuzz testing."""
 
-  def  __init__(self, num_tests, num_inputs, device, use_dx):
+  def  __init__(self, num_tests, num_inputs, device, dexer):
     """Constructor for the tester.
 
     Args:
       num_tests: int, number of tests to run
       num_inputs: int, number of JFuzz programs to generate
       device: string, target device serial number (or None)
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
     """
     self._num_tests = num_tests
     self._num_inputs = num_inputs
@@ -58,7 +58,7 @@
     self._dexfuzz_dir = None
     self._inputs_dir = None
     self._dexfuzz_env = None
-    self._use_dx = use_dx
+    self._dexer = dexer
 
   def __enter__(self):
     """On entry, enters new temp directory after saving current directory.
@@ -109,13 +109,14 @@
     Raises:
       FatalError: error when compilation fails
     """
-    if self._use_dx:
+    if self._dexer == 'dx' or self._dexer == 'd8':
       if RunCommand(['javac', 'Test.java'],
                     out=None, err='jerr.txt', timeout=30) != RetCode.SUCCESS:
         print('Unexpected error while running javac')
         raise FatalError('Unexpected error while running javac')
       cfiles = glob('*.class')
-      if RunCommand(['dx', '--dex', '--output=classes.dex'] + cfiles,
+      dx = 'dx' if self._dexer == 'dx' else 'd8-compat-dx'
+      if RunCommand([dx, '--dex', '--output=classes.dex'] + cfiles,
                     out=None, err='dxerr.txt', timeout=30) != RetCode.SUCCESS:
         print('Unexpected error while running dx')
         raise FatalError('Unexpected error while running dx')
@@ -124,7 +125,8 @@
         os.unlink(cfile)
       os.unlink('jerr.txt')
       os.unlink('dxerr.txt')
-    else:
+
+    elif self._dexer == 'jack':
       jack_args = ['-cp', GetJackClassPath(), '--output-dex', '.', 'Test.java']
       if RunCommand(['jack'] + jack_args, out=None, err='jackerr.txt',
                     timeout=30) != RetCode.SUCCESS:
@@ -132,6 +134,8 @@
         raise FatalError('Unexpected error while running Jack')
       # Cleanup on success (nothing to see).
       os.unlink('jackerr.txt')
+    else:
+      raise FatalError('Unknown dexer: ' + self._dexer)
 
   def GenerateJFuzzPrograms(self):
     """Generates JFuzz programs.
@@ -175,16 +179,16 @@
 def main():
   # Handle arguments.
   parser = argparse.ArgumentParser()
-  parser.add_argument('--num_tests', default=1000,
-                      type=int, help='number of tests to run')
-  parser.add_argument('--num_inputs', default=10,
-                      type=int, help='number of JFuzz program to generate')
-  parser.add_argument('--use_dx', default=False, action='store_true',
-                      help='use dx (rather than jack)')
+  parser.add_argument('--num_tests', default=1000, type=int,
+                      help='number of tests to run (default: 1000)')
+  parser.add_argument('--num_inputs', default=10, type=int,
+                      help='number of JFuzz program to generate (default: 10)')
+  parser.add_argument('--dexer', default='dx', type=str,
+                      help='defines dexer as dx, d8, or jack (default: dx)')
   parser.add_argument('--device', help='target device serial number')
   args = parser.parse_args()
   # Run the DexFuzz tester.
-  with DexFuzzTester(args.num_tests, args.num_inputs, args.device, args.use_dx) as fuzzer:
+  with DexFuzzTester(args.num_tests, args.num_inputs, args.device, args.dexer) as fuzzer:
     fuzzer.Run()
 
 if __name__ == '__main__':
diff --git a/tools/jfuzz/run_jfuzz_test.py b/tools/jfuzz/run_jfuzz_test.py
index dac1c79..b889940 100755
--- a/tools/jfuzz/run_jfuzz_test.py
+++ b/tools/jfuzz/run_jfuzz_test.py
@@ -43,11 +43,11 @@
 BISECTABLE_RET_CODES = (RetCode.SUCCESS, RetCode.ERROR, RetCode.TIMEOUT)
 
 
-def GetExecutionModeRunner(use_dx, device, mode):
+def GetExecutionModeRunner(dexer, device, mode):
   """Returns a runner for the given execution mode.
 
   Args:
-    use_dx: boolean, if True use dx rather than jack
+    dexer: string, defines dexer
     device: string, target device serial number (or None)
     mode: string, execution mode
   Returns:
@@ -58,13 +58,13 @@
   if mode == 'ri':
     return TestRunnerRIOnHost()
   if mode == 'hint':
-    return TestRunnerArtIntOnHost(use_dx)
+    return TestRunnerArtIntOnHost(dexer)
   if mode == 'hopt':
-    return TestRunnerArtOptOnHost(use_dx)
+    return TestRunnerArtOptOnHost(dexer)
   if mode == 'tint':
-    return TestRunnerArtIntOnTarget(use_dx, device)
+    return TestRunnerArtIntOnTarget(dexer, device)
   if mode == 'topt':
-    return TestRunnerArtOptOnTarget(use_dx, device)
+    return TestRunnerArtOptOnTarget(dexer, device)
   raise FatalError('Unknown execution mode')
 
 
@@ -117,27 +117,30 @@
 class TestRunnerWithHostCompilation(TestRunner):
   """Abstract test runner that supports compilation on host."""
 
-  def  __init__(self, use_dx):
+  def  __init__(self, dexer):
     """Constructor for the runner with host compilation.
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
     """
     self._jack_args = ['-cp', GetJackClassPath(), '--output-dex', '.',
                        'Test.java']
-    self._use_dx = use_dx
+    self._dexer = dexer
 
   def CompileOnHost(self):
-    if self._use_dx:
+    if self._dexer == 'dx' or self._dexer == 'd8':
       if RunCommand(['javac', 'Test.java'],
                     out=None, err=None, timeout=30) == RetCode.SUCCESS:
-        retc = RunCommand(['dx', '--dex', '--output=classes.dex'] + glob('*.class'),
+        dx = 'dx' if self._dexer == 'dx' else 'd8-compat-dx'
+        retc = RunCommand([dx, '--dex', '--output=classes.dex'] + glob('*.class'),
                           out=None, err='dxerr.txt', timeout=30)
       else:
         retc = RetCode.NOTCOMPILED
-    else:
+    elif self._dexer == 'jack':
       retc = RunCommand(['jack'] + self._jack_args,
                         out=None, err='jackerr.txt', timeout=30)
+    else:
+      raise FatalError('Unknown dexer: ' + self._dexer)
     return retc
 
 
@@ -167,14 +170,14 @@
 class TestRunnerArtOnHost(TestRunnerWithHostCompilation):
   """Abstract test runner of Art on host."""
 
-  def  __init__(self, use_dx, extra_args=None):
+  def  __init__(self, dexer, extra_args=None):
     """Constructor for the Art on host tester.
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
       extra_args: list of strings, extra arguments for dalvikvm
     """
-    super().__init__(use_dx)
+    super().__init__(dexer)
     self._art_cmd = ['/bin/bash', 'art', '-cp', 'classes.dex']
     if extra_args is not None:
       self._art_cmd += extra_args
@@ -191,13 +194,13 @@
 class TestRunnerArtIntOnHost(TestRunnerArtOnHost):
   """Concrete test runner of interpreter mode Art on host."""
 
-  def  __init__(self, use_dx):
+  def  __init__(self, dexer):
     """Constructor for the Art on host tester (interpreter).
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
    """
-    super().__init__(use_dx, ['-Xint'])
+    super().__init__(dexer, ['-Xint'])
 
   @property
   def description(self):
@@ -214,13 +217,13 @@
 class TestRunnerArtOptOnHost(TestRunnerArtOnHost):
   """Concrete test runner of optimizing compiler mode Art on host."""
 
-  def  __init__(self, use_dx):
+  def  __init__(self, dexer):
     """Constructor for the Art on host tester (optimizing).
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
    """
-    super().__init__(use_dx, None)
+    super().__init__(dexer, None)
 
   @property
   def description(self):
@@ -239,15 +242,15 @@
 class TestRunnerArtOnTarget(TestRunnerWithHostCompilation):
   """Abstract test runner of Art on target."""
 
-  def  __init__(self, use_dx, device, extra_args=None):
+  def  __init__(self, dexer, device, extra_args=None):
     """Constructor for the Art on target tester.
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
       device: string, target device serial number (or None)
       extra_args: list of strings, extra arguments for dalvikvm
     """
-    super().__init__(use_dx)
+    super().__init__(dexer)
     self._test_env = DeviceTestEnv('jfuzz_', specific_device=device)
     self._dalvik_cmd = ['dalvikvm']
     if extra_args is not None:
@@ -281,14 +284,14 @@
 class TestRunnerArtIntOnTarget(TestRunnerArtOnTarget):
   """Concrete test runner of interpreter mode Art on target."""
 
-  def  __init__(self, use_dx, device):
+  def  __init__(self, dexer, device):
     """Constructor for the Art on target tester (interpreter).
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
       device: string, target device serial number (or None)
     """
-    super().__init__(use_dx, device, ['-Xint'])
+    super().__init__(dexer, device, ['-Xint'])
 
   @property
   def description(self):
@@ -305,14 +308,14 @@
 class TestRunnerArtOptOnTarget(TestRunnerArtOnTarget):
   """Concrete test runner of optimizing compiler mode Art on target."""
 
-  def  __init__(self, use_dx, device):
+  def  __init__(self, dexer, device):
     """Constructor for the Art on target tester (optimizing).
 
     Args:
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
       device: string, target device serial number (or None)
     """
-    super().__init__(use_dx, device, None)
+    super().__init__(dexer, device, None)
 
   @property
   def description(self):
@@ -342,7 +345,7 @@
   """Tester that runs JFuzz many times and report divergences."""
 
   def  __init__(self, num_tests, device, mode1, mode2, jfuzz_args,
-                report_script, true_divergence_only, use_dx):
+                report_script, true_divergence_only, dexer):
     """Constructor for the tester.
 
     Args:
@@ -353,16 +356,16 @@
       jfuzz_args: list of strings, additional arguments for jfuzz
       report_script: string, path to script called for each divergence
       true_divergence_only: boolean, if True don't bisect timeout divergences
-      use_dx: boolean, if True use dx rather than jack
+      dexer: string, defines dexer
     """
     self._num_tests = num_tests
     self._device = device
-    self._runner1 = GetExecutionModeRunner(use_dx, device, mode1)
-    self._runner2 = GetExecutionModeRunner(use_dx, device, mode2)
+    self._runner1 = GetExecutionModeRunner(dexer, device, mode1)
+    self._runner2 = GetExecutionModeRunner(dexer, device, mode2)
     self._jfuzz_args = jfuzz_args
     self._report_script = report_script
     self._true_divergence_only = true_divergence_only
-    self._use_dx = use_dx
+    self._dexer = dexer
     self._save_dir = None
     self._results_dir = None
     self._jfuzz_dir = None
@@ -405,7 +408,7 @@
     print('Directory :', self._results_dir)
     print('Exec-mode1:', self._runner1.description)
     print('Exec-mode2:', self._runner2.description)
-    print('Compiler  :', 'dx' if self._use_dx else 'jack')
+    print('Dexer     :', self._dexer)
     print()
     self.ShowStats()
     for self._test in range(1, self._num_tests + 1):
@@ -525,8 +528,7 @@
                     for arg in jfuzz_cmd_str.strip().split(' -')][1:]
       wrapped_args = ['--jfuzz_arg={0}'.format(opt) for opt in jfuzz_args]
       repro_cmd_str = (os.path.basename(__file__) +
-                       ' --num_tests=1 ' +
-                       ('--use_dx ' if self._use_dx else '') +
+                       ' --num_tests=1 --dexer=' + self._dexer +
                        ' '.join(wrapped_args))
       comment = 'jfuzz {0}\nReproduce test:\n{1}\nReproduce divergence:\n{2}\n'.format(
           jfuzz_ver, jfuzz_cmd_str, repro_cmd_str)
@@ -592,21 +594,22 @@
 def main():
   # Handle arguments.
   parser = argparse.ArgumentParser()
-  parser.add_argument('--num_tests', default=10000,
-                      type=int, help='number of tests to run')
+  parser.add_argument('--num_tests', default=10000, type=int,
+                      help='number of tests to run')
   parser.add_argument('--device', help='target device serial number')
   parser.add_argument('--mode1', default='ri',
                       help='execution mode 1 (default: ri)')
   parser.add_argument('--mode2', default='hopt',
                       help='execution mode 2 (default: hopt)')
-  parser.add_argument('--report_script', help='script called for each'
-                                              ' divergence')
+  parser.add_argument('--report_script',
+                      help='script called for each divergence')
   parser.add_argument('--jfuzz_arg', default=[], dest='jfuzz_args',
-                      action='append', help='argument for jfuzz')
+                      action='append',
+                      help='argument for jfuzz')
   parser.add_argument('--true_divergence', default=False, action='store_true',
-                      help='don\'t bisect timeout divergences')
-  parser.add_argument('--use_dx', default=False, action='store_true',
-                      help='use dx (rather than jack)')
+                      help='do not bisect timeout divergences')
+  parser.add_argument('--dexer', default='dx', type=str,
+                      help='defines dexer as dx, d8, or jack (default: dx)')
   args = parser.parse_args()
   if args.mode1 == args.mode2:
     raise FatalError('Identical execution modes given')
@@ -614,7 +617,7 @@
   with JFuzzTester(args.num_tests,
                    args.device, args.mode1, args.mode2,
                    args.jfuzz_args, args.report_script,
-                   args.true_divergence, args.use_dx) as fuzzer:
+                   args.true_divergence, args.dexer) as fuzzer:
     fuzzer.Run()
 
 if __name__ == '__main__':
diff --git a/tools/libjdwp_art_failures.txt b/tools/prebuilt_libjdwp_art_failures.txt
similarity index 93%
rename from tools/libjdwp_art_failures.txt
rename to tools/prebuilt_libjdwp_art_failures.txt
index abcc728..7694a4c 100644
--- a/tools/libjdwp_art_failures.txt
+++ b/tools/prebuilt_libjdwp_art_failures.txt
@@ -1,6 +1,9 @@
 /*
  * This file contains expectations for ART's buildbot. The purpose of this file is
  * to temporarily list failing tests and not break the bots.
+ *
+ * This file contains the expectations for the 'prebuilt-libjdwp-aot' and
+ * 'prebuilt-libjdwp-jit' test groups on the chromium buildbot.
  */
 [
 {
@@ -64,6 +67,12 @@
            "org.apache.harmony.jpda.tests.jdwp.EventModifiers.InstanceOnlyModifierTest#testMethodExit",
            "org.apache.harmony.jpda.tests.jdwp.EventModifiers.InstanceOnlyModifierTest#testMethodExitWithReturnValue" ]
 },
+{
+  description: "Tests for VMDebug functionality not implemented in the upstream libjdwp",
+  result: EXEC_FAILED,
+  names: [ "org.apache.harmony.jpda.tests.jdwp.VMDebug.VMDebugTest#testVMDebug",
+           "org.apache.harmony.jpda.tests.jdwp.VMDebug.VMDebugTest002#testVMDebug" ]
+},
 /* TODO Categorize these failures more. */
 {
   description: "Tests that fail on both ART and RI. These tests are likely incorrect",
diff --git a/tools/run-libjdwp-tests.sh b/tools/run-libjdwp-tests.sh
index 47e7c45..e116fac 100755
--- a/tools/run-libjdwp-tests.sh
+++ b/tools/run-libjdwp-tests.sh
@@ -79,7 +79,7 @@
   args+=(-Xplugin:libopenjdkjvmti.so)
 fi
 
-expect_path=$PWD/art/tools/libjdwp_oj_art_failures.txt
+expect_path=$PWD/art/tools/external_oj_libjdwp_art_failures.txt
 function verbose_run() {
   echo "$@"
   env "$@"
diff --git a/tools/run-prebuilt-libjdwp-tests.sh b/tools/run-prebuilt-libjdwp-tests.sh
index 46c2a15..e7f028a 100755
--- a/tools/run-prebuilt-libjdwp-tests.sh
+++ b/tools/run-prebuilt-libjdwp-tests.sh
@@ -96,7 +96,7 @@
 fi
 
 props_path=$PWD/art/tools/libjdwp-compat.props
-expect_path=$PWD/art/tools/libjdwp_art_failures.txt
+expect_path=$PWD/art/tools/prebuilt_libjdwp_art_failures.txt
 
 function verbose_run() {
   echo "$@"