Merge "Save profile information in a separate thread."
diff --git a/Android.mk b/Android.mk
index 97a82e2..9cf2a30 100644
--- a/Android.mk
+++ b/Android.mk
@@ -405,10 +405,10 @@
 # Rules for building all dependencies for tests.
 
 .PHONY: build-art-host-tests
-build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES)
+build-art-host-tests:   build-art-host $(TEST_ART_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(ART_TEST_HOST_GTEST_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 .PHONY: build-art-target-tests
-build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS)
+build-art-target-tests:   build-art-target $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TEST_ART_TARGET_SYNC_DEPS) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 
 ########################################################################
 # targets to switch back and forth from libdvm to libart
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 43e1457..eec471e 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -359,12 +359,6 @@
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags)
 ART_HOST_ASFLAGS += $(art_asflags)
 
-# The latest clang update trips over many of the files in art and never finishes
-# compiling for aarch64 with -O3 (or -O2). Drop back to -O1 while we investigate
-# to stop punishing the build server.
-# Bug: http://b/23256622
-ART_TARGET_CLANG_CFLAGS_arm64 += -O1
-
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
   $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
 endif
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 291a69d..1c23929 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -564,6 +564,9 @@
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
     LOCAL_CLANG_CFLAGS += -Wno-used-but-marked-unused -Wno-deprecated -Wno-missing-noreturn  # gtest issue
+    # clang fails to compile art/runtime/arch/stub_test.cc for arm64 without -O1
+    # b/26275713
+    LOCAL_CLANG_CFLAGS_arm64 += -O1
     include $$(BUILD_EXECUTABLE)
     library_path :=
     2nd_library_path :=
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 592843e..50600ef 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -112,7 +112,7 @@
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
 $$(core_image_name): $$(HOST_CORE_DEX_LOCATIONS) $$(core_dex2oat_dependency)
-	@echo "host dex2oat: $$@ ($$?)"
+	@echo "host dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $(4) $$(DEX2OAT)$(5) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) \
 	  --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
@@ -238,7 +238,7 @@
 $$(core_image_name): PRIVATE_CORE_IMG_NAME := $$(core_image_name)
 $$(core_image_name): PRIVATE_CORE_OAT_NAME := $$(core_oat_name)
 $$(core_image_name): $$(TARGET_CORE_DEX_FILES) $$(core_dex2oat_dependency)
-	@echo "target dex2oat: $$@ ($$?)"
+	@echo "target dex2oat: $$@"
 	@mkdir -p $$(dir $$@)
 	$$(hide) $(4) $$(DEX2OAT)$(5) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) \
 	  --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 638b897..c7c1907 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -142,10 +142,7 @@
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
   size_t pointer_size = class_linker_->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    MakeExecutable(&m);
-  }
-  for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+  for (auto& m : klass->GetMethods(pointer_size)) {
     MakeExecutable(&m);
   }
 }
@@ -259,10 +256,7 @@
   mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader);
   CHECK(klass != nullptr) << "Class not found " << class_name;
   auto pointer_size = class_linker_->GetImagePointerSize();
-  for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    CompileMethod(&m);
-  }
-  for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+  for (auto& m : klass->GetMethods(pointer_size)) {
     CompileMethod(&m);
   }
 }
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 15a4ba0..7a93613 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -23,7 +23,7 @@
 
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
-#include "length_prefixed_array.h"
+#include "base/length_prefixed_array.h"
 #include "method_reference.h"
 #include "utils/array_ref.h"
 
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e5d3841..1c2a619 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -508,6 +508,7 @@
   { kX86Lfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" },
   { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
   { kX86Sfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" },
+  { kX86LockAdd32MI8, kMemImm,        IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" },
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 75f3fef..4ff7993 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,7 +20,7 @@
 #include <inttypes.h>
 #include <string>
 
-#include "arch/instruction_set_features.h"
+#include "arch/x86/instruction_set_features_x86.h"
 #include "art_method.h"
 #include "backend_x86.h"
 #include "base/logging.h"
@@ -585,6 +585,8 @@
       case kX86LockCmpxchgAR:
       case kX86LockCmpxchg64M:
       case kX86LockCmpxchg64A:
+      case kX86LockCmpxchg64AR:
+      case kX86LockAdd32MI8:
       case kX86XchgMR:
       case kX86Mfence:
         // Atomic memory instructions provide full barrier.
@@ -598,7 +600,9 @@
 }
 
 bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
+  const X86InstructionSetFeatures* features =
+    cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
+  if (!features->IsSmp()) {
     return false;
   }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
@@ -610,20 +614,34 @@
    * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
    * For those cases, all we need to ensure is that there is a scheduling barrier in place.
    */
+  const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
+  bool use_locked_add = features->PrefersLockedAddSynchronization();
   if (barrier_kind == kAnyAny) {
-    // If no LIR exists already that can be used a barrier, then generate an mfence.
+    // If no LIR exists already that can be used a barrier, then generate a barrier.
     if (mem_barrier == nullptr) {
-      mem_barrier = NewLIR0(kX86Mfence);
+      if (use_locked_add) {
+        mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+      } else {
+        mem_barrier = NewLIR0(kX86Mfence);
+      }
       ret = true;
     }
 
-    // If last instruction does not provide full barrier, then insert an mfence.
+    // If last instruction does not provide full barrier, then insert a barrier.
     if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
-      mem_barrier = NewLIR0(kX86Mfence);
+      if (use_locked_add) {
+        mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+      } else {
+        mem_barrier = NewLIR0(kX86Mfence);
+      }
       ret = true;
     }
   } else if (barrier_kind == kNTStoreStore) {
-      mem_barrier = NewLIR0(kX86Sfence);
+      if (use_locked_add) {
+        mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+      } else {
+        mem_barrier = NewLIR0(kX86Sfence);
+      }
       ret = true;
   }
 
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index d6a6a60..8cd6574 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -606,6 +606,7 @@
                                 // load-from-memory and store-to-memory instructions
   kX86Sfence,                   // memory barrier to serialize all previous
                                 // store-to-memory instructions
+  kX86LockAdd32MI8,             // locked add used to serialize memory instructions
   Binary0fOpCode(kX86Imul16),   // 16bit multiply
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
   Binary0fOpCode(kX86Imul64),   // 64bit multiply
diff --git a/compiler/dex/type_inference_test.cc b/compiler/dex/type_inference_test.cc
index 528a18c..e2c0d32 100644
--- a/compiler/dex/type_inference_test.cc
+++ b/compiler/dex/type_inference_test.cc
@@ -253,7 +253,7 @@
         &cu_, cu_.class_loader, cu_.class_linker, *cu_.dex_file, nullptr /* code_item not used */,
         0u /* class_def_idx not used */, 0u /* method_index not used */,
         cu_.access_flags, nullptr /* verified_method not used */,
-        NullHandle<mirror::DexCache>()));
+        ScopedNullHandle<mirror::DexCache>()));
     cu_.mir_graph->current_method_ = 0u;
     code_item_ = static_cast<DexFile::CodeItem*>(
         cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index ef10b67..d6961a0 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -20,8 +20,8 @@
 #include <iosfwd>
 #include <memory>
 
+#include "base/length_prefixed_array.h"
 #include "base/macros.h"
-#include "length_prefixed_array.h"
 #include "utils/array_ref.h"
 #include "utils/dedupe_set.h"
 #include "utils/swap_space.h"
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ba8f1d0..5630b08 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -790,10 +790,7 @@
 
   virtual bool Visit(mirror::Class* c) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : c->GetVirtualMethods(pointer_size)) {
-      ResolveExceptionsForMethod(&m, pointer_size);
-    }
-    for (auto& m : c->GetDirectMethods(pointer_size)) {
+    for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
     }
     return true;
@@ -901,8 +898,10 @@
           *dex_file,
           Runtime::Current()->GetLinearAlloc())));
       Handle<mirror::Class> klass(hs2.NewHandle(
-          class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache,
-                                    NullHandle<mirror::ClassLoader>())));
+          class_linker->ResolveType(*dex_file,
+                                    exception_type_idx,
+                                    dex_cache,
+                                    ScopedNullHandle<mirror::ClassLoader>())));
       if (klass.Get() == nullptr) {
         const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index f8de9fa..b6abc6e 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -86,10 +86,7 @@
       mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader);
       CHECK(c != nullptr);
       const auto pointer_size = class_linker->GetImagePointerSize();
-      for (auto& m : c->GetDirectMethods(pointer_size)) {
-        MakeExecutable(&m);
-      }
-      for (auto& m : c->GetVirtualMethods(pointer_size)) {
+      for (auto& m : c->GetMethods(pointer_size)) {
         MakeExecutable(&m);
       }
     }
@@ -152,9 +149,14 @@
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
-    CompileVirtualMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Class", "isFinalizable",
+    CompileVirtualMethod(ScopedNullHandle<mirror::ClassLoader>(),
+                         "java.lang.Class",
+                         "isFinalizable",
                          "()Z");
-    CompileDirectMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Object", "<init>", "()V");
+    CompileDirectMethod(ScopedNullHandle<mirror::ClassLoader>(),
+                        "java.lang.Object",
+                        "<init>",
+                        "()V");
     class_loader = LoadDex("AbstractMethod");
   }
   ASSERT_TRUE(class_loader != nullptr);
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 4d2d924..8c38cf2 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -34,6 +34,7 @@
       include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
+      native_debuggable_(kDefaultNativeDebuggable),
       generate_debug_info_(kDefaultGenerateDebugInfo),
       implicit_null_checks_(true),
       implicit_so_checks_(true),
@@ -81,6 +82,7 @@
     include_patch_information_(include_patch_information),
     top_k_profile_threshold_(top_k_profile_threshold),
     debuggable_(debuggable),
+    native_debuggable_(kDefaultNativeDebuggable),
     generate_debug_info_(generate_debug_info),
     implicit_null_checks_(implicit_null_checks),
     implicit_so_checks_(implicit_so_checks),
@@ -207,6 +209,10 @@
   } else if (option == "--debuggable") {
     debuggable_ = true;
     generate_debug_info_ = true;
+  } else if (option == "--native-debuggable") {
+    native_debuggable_ = true;
+    debuggable_ = true;
+    generate_debug_info_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
   } else if (option == "--include-patch-information") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index e6acab4..2b047a2 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -49,6 +49,7 @@
   static const size_t kDefaultTinyMethodThreshold = 20;
   static const size_t kDefaultNumDexMethodsThreshold = 900;
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
+  static const bool kDefaultNativeDebuggable = false;
   static const bool kDefaultGenerateDebugInfo = kIsDebugBuild;
   static const bool kDefaultIncludePatchInformation = false;
   static const size_t kDefaultInlineDepthLimit = 3;
@@ -162,6 +163,10 @@
     return debuggable_;
   }
 
+  bool GetNativeDebuggable() const {
+    return native_debuggable_;
+  }
+
   bool GetGenerateDebugInfo() const {
     return generate_debug_info_;
   }
@@ -240,6 +245,7 @@
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool debuggable_;
+  bool native_debuggable_;
   bool generate_debug_info_;
   bool implicit_null_checks_;
   bool implicit_so_checks_;
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 06553a6..2bc8c89 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -36,6 +36,15 @@
 namespace art {
 namespace dwarf {
 
+// The ARM specification defines three special mapping symbols
+// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
+// These symbols can be used by tools, for example, to pretty
+// print instructions correctly.  Objdump will use them if they
+// exist, but it will still work well without them.
+// However, these extra symbols take space, so let's just generate
+// one symbol which marks the whole .text section as code.
+constexpr bool kGenerateSingleArmMappingSymbol = true;
+
 static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
   switch (isa) {
     case kArm:
@@ -207,8 +216,7 @@
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
                      const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format) {
-  CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
-        format == dwarf::DW_EH_FRAME_FORMAT);
+  CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
   std::vector<uint32_t> binary_search_table;
@@ -220,7 +228,7 @@
   }
 
   // Write .eh_frame/.debug_frame section.
-  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
+  auto* cfi_section = (format == DW_DEBUG_FRAME_FORMAT
                        ? builder->GetDebugFrame()
                        : builder->GetEhFrame());
   {
@@ -1134,21 +1142,87 @@
   }
 }
 
+template <typename ElfTypes>
+void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                       const ArrayRef<const MethodDebugInfo>& method_infos) {
+  bool generated_mapping_symbol = false;
+  auto* strtab = builder->GetStrTab();
+  auto* symtab = builder->GetSymTab();
+
+  if (method_infos.empty()) {
+    return;
+  }
+
+  // Find all addresses (low_pc) which contain deduped methods.
+  // The first instance of method is not marked deduped_, but the rest is.
+  std::unordered_set<uint32_t> deduped_addresses;
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
+    }
+  }
+
+  strtab->Start();
+  strtab->Write("");  // strtab should start with empty string.
+  for (const MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      continue;  // Add symbol only for the first instance.
+    }
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
+      name += " [DEDUPED]";
+    }
+
+    uint32_t low_pc = info.low_pc_;
+    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
+    low_pc += info.compiled_method_->CodeDelta();
+    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
+
+    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
+    // instructions, so that disassembler tools can correctly disassemble.
+    // Note that even if we generate just a single mapping symbol, ARM's Streamline
+    // requires it to match function symbol.  Just address 0 does not work.
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
+      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
+                    true, 0, STB_LOCAL, STT_NOTYPE);
+        generated_mapping_symbol = true;
+      }
+    }
+  }
+  strtab->End();
+
+  // Symbols are buffered and written after names (because they are smaller).
+  // We could also do two passes in this function to avoid the buffering.
+  symtab->Start();
+  symtab->Write();
+  symtab->End();
+}
+
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    CFIFormat cfi_format) {
+  if (!method_infos.empty()) {
+    // Add methods to .symtab.
+    WriteDebugSymbols(builder, method_infos);
+    // Generate CFI (stack unwinding information).
+    WriteCFISection(builder, method_infos, cfi_format);
+    // Write DWARF .debug_* sections.
+    WriteDebugSections(builder, method_infos);
+  }
+}
+
 // Explicit instantiations
-template void WriteCFISection<ElfTypes32>(
+template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat format);
-template void WriteCFISection<ElfTypes64>(
+    CFIFormat cfi_format);
+template void WriteDebugInfo<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
     const ArrayRef<const MethodDebugInfo>& method_infos,
-    CFIFormat format);
-template void WriteDebugSections<ElfTypes32>(
-    ElfBuilder<ElfTypes32>* builder,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
-template void WriteDebugSections<ElfTypes64>(
-    ElfBuilder<ElfTypes64>* builder,
-    const ArrayRef<const MethodDebugInfo>& method_infos);
+    CFIFormat cfi_format);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 9ed102f..7ec0be1 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -25,14 +25,10 @@
 namespace art {
 namespace dwarf {
 
-template<typename ElfTypes>
-void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const ArrayRef<const MethodDebugInfo>& method_infos,
-                     CFIFormat format);
-
-template<typename ElfTypes>
-void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const ArrayRef<const MethodDebugInfo>& method_infos);
+template <typename ElfTypes>
+void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
+                    const ArrayRef<const MethodDebugInfo>& method_infos,
+                    CFIFormat cfi_format);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index e411496..7b1bdd7 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -46,15 +46,6 @@
 // Let's use .debug_frame because it is easier to strip or compress.
 constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT;
 
-// The ARM specification defines three special mapping symbols
-// $a, $t and $d which mark ARM, Thumb and data ranges respectively.
-// These symbols can be used by tools, for example, to pretty
-// print instructions correctly.  Objdump will use them if they
-// exist, but it will still work well without them.
-// However, these extra symbols take space, so let's just generate
-// one symbol which marks the whole .text section as code.
-constexpr bool kGenerateSingleArmMappingSymbol = true;
-
 template <typename ElfTypes>
 class ElfWriterQuick FINAL : public ElfWriter {
  public:
@@ -99,10 +90,6 @@
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
-                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos);
-
-template <typename ElfTypes>
 ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
                                          const CompilerOptions* compiler_options,
                                          File* elf_file)
@@ -165,14 +152,7 @@
 void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
     const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   if (compiler_options_->GetGenerateDebugInfo()) {
-    if (!method_infos.empty()) {
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder_.get(), method_infos);
-      // Generate CFI (stack unwinding information).
-      dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat);
-      // Write DWARF .debug_* sections.
-      dwarf::WriteDebugSections(builder_.get(), method_infos);
-    }
+    dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat);
   }
 }
 
@@ -199,64 +179,6 @@
   return builder_->GetStream();
 }
 
-template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
-                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
-  bool generated_mapping_symbol = false;
-  auto* strtab = builder->GetStrTab();
-  auto* symtab = builder->GetSymTab();
-
-  if (method_infos.empty()) {
-    return;
-  }
-
-  // Find all addresses (low_pc) which contain deduped methods.
-  // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
-  for (const dwarf::MethodDebugInfo& info : method_infos) {
-    if (info.deduped_) {
-      deduped_addresses.insert(info.low_pc_);
-    }
-  }
-
-  strtab->Start();
-  strtab->Write("");  // strtab should start with empty string.
-  for (const dwarf::MethodDebugInfo& info : method_infos) {
-    if (info.deduped_) {
-      continue;  // Add symbol only for the first instance.
-    }
-    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
-    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
-      name += " [DEDUPED]";
-    }
-
-    uint32_t low_pc = info.low_pc_;
-    // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += info.compiled_method_->CodeDelta();
-    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
-
-    // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
-    // instructions, so that disassembler tools can correctly disassemble.
-    // Note that even if we generate just a single mapping symbol, ARM's Streamline
-    // requires it to match function symbol.  Just address 0 does not work.
-    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
-      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
-                    true, 0, STB_LOCAL, STT_NOTYPE);
-        generated_mapping_symbol = true;
-      }
-    }
-  }
-  strtab->End();
-
-  // Symbols are buffered and written after names (because they are smaller).
-  // We could also do two passes in this function to avoid the buffering.
-  symtab->Start();
-  symtab->Write();
-  symtab->End();
-}
-
 // Explicit instantiations
 template class ElfWriterQuick<ElfTypes32>;
 template class ElfWriterQuick<ElfTypes64>;
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index afa4904..9545c83 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1030,44 +1030,42 @@
         }
       }
       // Visit and assign offsets for methods.
-      LengthPrefixedArray<ArtMethod>* method_arrays[] = {
-          as_klass->GetDirectMethodsPtr(), as_klass->GetVirtualMethodsPtr(),
-      };
-      for (LengthPrefixedArray<ArtMethod>* array : method_arrays) {
-        if (array == nullptr) {
-          continue;
-        }
+      size_t num_methods = as_klass->NumMethods();
+      if (num_methods != 0) {
         bool any_dirty = false;
-        size_t count = 0;
-        const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
-        const size_t method_size = ArtMethod::Size(target_ptr_size_);
-        auto iteration_range =
-            MakeIterationRangeFromLengthPrefixedArray(array, method_size, method_alignment);
-        for (auto& m : iteration_range) {
-          any_dirty = any_dirty || WillMethodBeDirty(&m);
-          ++count;
+        for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
+          if (WillMethodBeDirty(&m)) {
+            any_dirty = true;
+            break;
+          }
         }
         NativeObjectRelocationType type = any_dirty
             ? kNativeObjectRelocationTypeArtMethodDirty
             : kNativeObjectRelocationTypeArtMethodClean;
         Bin bin_type = BinTypeForNativeRelocationType(type);
         // Forward the entire array at once, but header first.
+        const size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
+        const size_t method_size = ArtMethod::Size(target_ptr_size_);
         const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0,
                                                                                method_size,
                                                                                method_alignment);
+        LengthPrefixedArray<ArtMethod>* array = as_klass->GetMethodsPtr();
         auto it = native_object_relocations_.find(array);
-        CHECK(it == native_object_relocations_.end()) << "Method array " << array
-            << " already forwarded";
+        CHECK(it == native_object_relocations_.end())
+            << "Method array " << array << " already forwarded";
         size_t& offset = bin_slot_sizes_[bin_type];
         DCHECK(!IsInBootImage(array));
-        native_object_relocations_.emplace(array, NativeObjectRelocation { offset,
-            any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty :
-                kNativeObjectRelocationTypeArtMethodArrayClean });
+        native_object_relocations_.emplace(
+            array, NativeObjectRelocation {
+              offset,
+              any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty
+                        : kNativeObjectRelocationTypeArtMethodArrayClean
+            });
         offset += header_size;
-        for (auto& m : iteration_range) {
+        for (auto& m : as_klass->GetMethods(target_ptr_size_)) {
           AssignMethodOffset(&m, type);
         }
-        (any_dirty ? dirty_methods_ : clean_methods_) += count;
+        (any_dirty ? dirty_methods_ : clean_methods_) += num_methods;
       }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 8e930f0..f1b2965 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -27,11 +27,11 @@
 #include <ostream>
 
 #include "base/bit_utils.h"
+#include "base/length_prefixed_array.h"
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
 #include "gc/space/space.h"
 #include "image.h"
-#include "length_prefixed_array.h"
 #include "lock_word.h"
 #include "mem_map.h"
 #include "oat_file.h"
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 5ab55e0..8d60be2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -220,7 +220,8 @@
 
   std::string reason;
   ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
-                  LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason))
+                  LoadNativeLibrary(env_, "", class_loader_, /* is_shared_namespace */ false,
+                                    nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
@@ -235,7 +236,8 @@
 
   std::string reason;
   ASSERT_TRUE(Runtime::Current()->GetJavaVM()->
-                  LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason))
+                  LoadNativeLibrary(env_, "", class_loader_, /* is_shared_namespace */ false,
+                                    nullptr, nullptr, &reason))
       << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index eea5204..7b7d46c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -224,8 +224,9 @@
     }
 
     const char* descriptor = dex_file.GetClassDescriptor(class_def);
-    mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor,
-                                                   NullHandle<mirror::ClassLoader>());
+    mirror::Class* klass = class_linker->FindClass(soa.Self(),
+                                                   descriptor,
+                                                   ScopedNullHandle<mirror::ClassLoader>());
 
     const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(i);
     CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class.GetStatus()) << descriptor;
@@ -238,12 +239,12 @@
       ++method_index;
     }
     size_t visited_virtuals = 0;
-    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
-      if (!m.IsMiranda()) {
-        CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
-        ++method_index;
-        ++visited_virtuals;
-      }
+    // TODO We should also check copied methods in this test.
+    for (auto& m : klass->GetDeclaredVirtualMethods(pointer_size)) {
+      EXPECT_FALSE(m.IsMiranda());
+      CheckMethod(&m, oat_class.GetOatMethod(method_index), dex_file);
+      ++method_index;
+      ++visited_virtuals;
     }
     EXPECT_EQ(visited_virtuals, num_virtual_methods);
   }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 2b2f0e8..53ac77b 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -737,7 +737,7 @@
         *dex_file_,
         it.GetMemberIndex(),
         dex_cache,
-        NullHandle<mirror::ClassLoader>(),
+        ScopedNullHandle<mirror::ClassLoader>(),
         nullptr,
         invoke_type);
     if (method == nullptr) {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 1178d0f..4dd0d26 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -17,6 +17,8 @@
 #include "builder.h"
 
 #include "art_field-inl.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/logging.h"
 #include "class_linker.h"
 #include "dex/verified_method.h"
@@ -458,6 +460,19 @@
     return false;
   }
 
+  // Find locations where we want to generate extra stackmaps for native debugging.
+  // This allows us to generate the info only at interesting points (for example,
+  // at start of java statement) rather than before every dex instruction.
+  const bool native_debuggable = compiler_driver_ != nullptr &&
+                                 compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+  ArenaBitVector* native_debug_info_locations;
+  if (native_debuggable) {
+    const uint32_t num_instructions = code_item.insns_size_in_code_units_;
+    native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false);
+    native_debug_info_locations->ClearAllBits();
+    FindNativeDebugInfoLocations(code_item, native_debug_info_locations);
+  }
+
   CreateBlocksForTryCatch(code_item);
 
   InitializeParameters(code_item.ins_size_);
@@ -467,6 +482,11 @@
     // Update the current block if dex_pc starts a new block.
     MaybeUpdateCurrentBlock(dex_pc);
     const Instruction& instruction = *Instruction::At(code_ptr);
+    if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) {
+      if (current_block_ != nullptr) {
+        current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc));
+      }
+    }
     if (!AnalyzeDexInstruction(instruction, dex_pc)) {
       return false;
     }
@@ -507,6 +527,47 @@
   current_block_ = block;
 }
 
+void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item,
+                                                 ArenaBitVector* locations) {
+  // The callback gets called when the line number changes.
+  // In other words, it marks the start of new java statement.
+  struct Callback {
+    static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
+      static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
+      return false;
+    }
+  };
+  dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations);
+  // Add native debug info at the start of every basic block.
+  for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) {
+    if (FindBlockStartingAt(pc) != nullptr) {
+      locations->SetBit(pc);
+    }
+  }
+  // Instruction-specific tweaks.
+  const Instruction* const begin = Instruction::At(code_item.insns_);
+  const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_);
+  for (const Instruction* inst = begin; inst < end; inst = inst->Next()) {
+    switch (inst->Opcode()) {
+      case Instruction::MOVE_EXCEPTION:
+      case Instruction::MOVE_RESULT:
+      case Instruction::MOVE_RESULT_WIDE:
+      case Instruction::MOVE_RESULT_OBJECT: {
+        // The compiler checks that there are no instructions before those.
+        // So generate HNativeDebugInfo after them instead.
+        locations->ClearBit(inst->GetDexPc(code_item.insns_));
+        const Instruction* next = inst->Next();
+        if (next < end) {
+          locations->SetBit(next->GetDexPc(code_item.insns_));
+        }
+        break;
+      }
+      default:
+        break;
+    }
+  }
+}
+
 bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
                                          const uint16_t* code_end,
                                          size_t* number_of_branches) {
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c3979f3..26bf1cb 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -80,7 +80,8 @@
         can_use_baseline_for_string_init_(true),
         compilation_stats_(nullptr),
         interpreter_metadata_(nullptr),
-        dex_cache_(NullHandle<mirror::DexCache>()) {}
+        null_dex_cache_(),
+        dex_cache_(null_dex_cache_) {}
 
   bool BuildGraph(const DexFile::CodeItem& code);
 
@@ -90,8 +91,9 @@
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table.
-  static constexpr uint16_t kSmallSwitchThreshold = 5;
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
 
  private:
   // Analyzes the dex instruction and adds HInstruction to the graph
@@ -110,6 +112,7 @@
                             const uint16_t* end,
                             size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t dex_pc);
+  void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations);
   HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const;
   HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc);
 
@@ -370,6 +373,7 @@
   const uint8_t* interpreter_metadata_;
 
   // Dex cache for dex_file_.
+  ScopedNullHandle<mirror::DexCache> null_dex_cache_;
   Handle<mirror::DexCache> dex_cache_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3630dbe..58feb67 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -1618,7 +1618,15 @@
                         /* false_target */ nullptr);
 }
 
-void LocationsBuilderARM::VisitCondition(HCondition* cond) {
+void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
+void LocationsBuilderARM::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1649,7 +1657,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1706,83 +1714,83 @@
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderARM::VisitLocal(HLocal* local) {
@@ -6250,7 +6258,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
       codegen_->GetAssembler()->IsThumb()) {
     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
     if (switch_instr->GetStartValue() != 0) {
@@ -6266,12 +6274,30 @@
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+  if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
     // Create a series of compare/jumps.
+    Register temp_reg = IP;
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      GenerateCompareWithImmediate(value_reg, lower_bound + i);
-      __ b(codegen_->GetLabelOf(successors[i]), EQ);
+    // Jump to successors[0] if value == lower_bound.
+    __ b(codegen_->GetLabelOf(successors[0]), EQ);
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      GenerateCompareWithImmediate(temp_reg, 1);
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
     }
 
     // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8193c28..b7c58e1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -170,6 +170,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+  void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
   void HandleLongRotate(LocationSummary* locations);
   void HandleRotate(HRor* ror);
@@ -216,6 +217,7 @@
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
   void HandleLongRotate(LocationSummary* locations);
   void HandleRotate(HRor* ror);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 451470f..b49f42b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -71,10 +71,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -546,7 +546,7 @@
 
 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
   uint32_t num_entries = switch_instr_->GetNumEntries();
-  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
 
   // We are about to use the assembler to place literals directly. Make sure we have enough
   // underlying code buffer and we have generated the jump table with right size.
@@ -2427,7 +2427,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
 
   if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
@@ -2447,7 +2447,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
@@ -2495,8 +2495,8 @@
   M(Above)                                                                               \
   M(AboveOrEqual)
 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
-void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
-void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
+void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
+void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
 #undef DEFINE_CONDITION_VISITORS
 #undef FOR_EACH_CONDITION_INSTRUCTION
@@ -2949,6 +2949,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   HandleFieldGet(instruction);
 }
@@ -4582,20 +4590,29 @@
   // ranges and emit the tables only as required.
   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  if (num_entries < kPackedSwitchJumpTableThreshold ||
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
       // Current instruction id is an upper bound of the number of HIRs in the graph.
       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
     // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+    Register temp = temps.AcquireW();
+    __ Subs(temp, value_reg, Operand(lower_bound));
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      int32_t case_value = lower_bound + i;
-      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-      if (case_value == 0) {
-        __ Cbz(value_reg, succ);
-      } else {
-        __ Cmp(value_reg, Operand(case_value));
-        __ B(eq, succ);
-      }
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Subs(temp, temp, Operand(2));
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp, Operand(1));
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
     }
 
     // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7950f07..0e90ac6 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -215,6 +215,7 @@
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
@@ -257,6 +258,7 @@
   void HandleFieldSet(HInstruction* instruction);
   void HandleFieldGet(HInstruction* instruction);
   void HandleInvoke(HInvoke* instr);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* instr);
 
   CodeGeneratorARM64* const codegen_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5dc101b..07efdee 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2092,7 +2092,7 @@
   }
 }
 
-void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   switch (instruction->InputAt(0)->GetType()) {
     default:
@@ -2112,7 +2112,7 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
@@ -3244,6 +3244,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   Primitive::Type field_type = field_info.GetFieldType();
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
@@ -4792,83 +4800,83 @@
 }
 
 void LocationsBuilderMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) {
@@ -4897,19 +4905,31 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Create a set of compare/jumps.
+  Register temp_reg = TMP;
+  __ Addiu32(temp_reg, value_reg, -lower_bound);
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; ++i) {
-    int32_t case_value = lower_bound + i;
-    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqz(value_reg, successor_label);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beq(value_reg, TMP, successor_label);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
-  // Insert the default branch for every other value.
+  // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 1ee6bde..38302ad 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -185,6 +185,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -220,6 +221,7 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 99f58dd..05834ff 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1752,11 +1752,7 @@
 void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
   Primitive::Type in_type = compare->InputAt(0)->GetType();
 
-  LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type)
-      ? LocationSummary::kCall
-      : LocationSummary::kNoCall;
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
 
   switch (in_type) {
     case Primitive::kPrimLong:
@@ -1766,13 +1762,11 @@
       break;
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      InvokeRuntimeCallingConvention calling_convention;
-      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
-      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
-      locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
@@ -1781,14 +1775,15 @@
 
 void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  GpuRegister res = locations->Out().AsRegister<GpuRegister>();
   Primitive::Type in_type = instruction->InputAt(0)->GetType();
+  bool gt_bias = instruction->IsGtBias();
 
   //  0 if: left == right
   //  1 if: left  > right
   // -1 if: left  < right
   switch (in_type) {
     case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
       Location rhs_location = locations->InAt(1);
       bool use_imm = rhs_location.IsConstant();
@@ -1803,35 +1798,52 @@
         rhs = rhs_location.AsRegister<GpuRegister>();
       }
       __ Slt(TMP, lhs, rhs);
-      __ Slt(dst, rhs, lhs);
-      __ Subu(dst, dst, TMP);
+      __ Slt(res, rhs, lhs);
+      __ Subu(res, res, TMP);
       break;
     }
 
-    case Primitive::kPrimFloat:
+    case Primitive::kPrimFloat: {
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      Mips64Label done;
+      __ CmpEqS(FTMP, lhs, rhs);
+      __ LoadConst32(res, 0);
+      __ Bc1nez(FTMP, &done);
+      if (gt_bias) {
+        __ CmpLtS(FTMP, lhs, rhs);
+        __ LoadConst32(res, -1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, 1);
+      } else {
+        __ CmpLtS(FTMP, rhs, lhs);
+        __ LoadConst32(res, 1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, -1);
+      }
+      __ Bind(&done);
+      break;
+    }
+
     case Primitive::kPrimDouble: {
-      int32_t entry_point_offset;
-      if (in_type == Primitive::kPrimFloat) {
-        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat)
-                                                     : QUICK_ENTRY_POINT(pCmplFloat);
+      FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+      FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+      Mips64Label done;
+      __ CmpEqD(FTMP, lhs, rhs);
+      __ LoadConst32(res, 0);
+      __ Bc1nez(FTMP, &done);
+      if (gt_bias) {
+        __ CmpLtD(FTMP, lhs, rhs);
+        __ LoadConst32(res, -1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, 1);
       } else {
-        entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble)
-                                                     : QUICK_ENTRY_POINT(pCmplDouble);
+        __ CmpLtD(FTMP, rhs, lhs);
+        __ LoadConst32(res, 1);
+        __ Bc1nez(FTMP, &done);
+        __ LoadConst32(res, -1);
       }
-      codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
-      if (in_type == Primitive::kPrimFloat) {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
-        }
-      } else {
-        if (instruction->IsGtBias()) {
-          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
-        } else {
-          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
-        }
-      }
+      __ Bind(&done);
       break;
     }
 
@@ -1840,143 +1852,67 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) {
+void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  switch (instruction->InputAt(0)->GetType()) {
+    default:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+  }
   if (instruction->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
-void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
+void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) {
   if (!instruction->NeedsMaterialization()) {
     return;
   }
 
-  // TODO: generalize to long
-  DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong);
-
+  Primitive::Type type = instruction->InputAt(0)->GetType();
   LocationSummary* locations = instruction->GetLocations();
-
   GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-  Location rhs_location = locations->InAt(1);
+  Mips64Label true_label;
 
-  GpuRegister rhs_reg = ZERO;
-  int64_t rhs_imm = 0;
-  bool use_imm = rhs_location.IsConstant();
-  if (use_imm) {
-    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-  } else {
-    rhs_reg = rhs_location.AsRegister<GpuRegister>();
-  }
+  switch (type) {
+    default:
+      // Integer case.
+      GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations);
+      return;
+    case Primitive::kPrimLong:
+      GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations);
+      return;
 
-  IfCondition if_cond = instruction->GetCondition();
-
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
-      } else {
-        __ Sltu(dst, ZERO, dst);
-      }
-      break;
-
-    case kCondLT:
-    case kCondGE:
-      if (use_imm && IsInt<16>(rhs_imm)) {
-        __ Slti(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondGE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the slt instruction but no sge.
-        __ Xori(dst, dst, 1);
-      }
-      break;
-
-    case kCondLE:
-    case kCondGT:
-      if (use_imm && IsInt<16>(rhs_imm + 1)) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Slti(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondGT) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the slti instruction but no sgti.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Slt(dst, rhs_reg, lhs);
-        if (if_cond == kCondLE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the slt instruction but no sle.
-          __ Xori(dst, dst, 1);
-        }
-      }
-      break;
-
-    case kCondB:
-    case kCondAE:
-      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) {
-        __ Sltiu(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, lhs, rhs_reg);
-      }
-      if (if_cond == kCondAE) {
-        // Simulate lhs >= rhs via !(lhs < rhs) since there's
-        // only the sltu instruction but no sgeu.
-        __ Xori(dst, dst, 1);
-      }
-      break;
-
-    case kCondBE:
-    case kCondA:
-      if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) {
-        // Simulate lhs <= rhs via lhs < rhs + 1.
-        __ Sltiu(dst, lhs, rhs_imm + 1);
-        if (if_cond == kCondA) {
-          // Simulate lhs > rhs via !(lhs <= rhs) since there's
-          // only the sltiu instruction but no sgtiu.
-          __ Xori(dst, dst, 1);
-        }
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
-        }
-        __ Sltu(dst, rhs_reg, lhs);
-        if (if_cond == kCondBE) {
-          // Simulate lhs <= rhs via !(rhs < lhs) since there's
-          // only the sltu instruction but no sleu.
-          __ Xori(dst, dst, 1);
-        }
-      }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO: don't use branches.
+      GenerateFpCompareAndBranch(instruction->GetCondition(),
+                                 instruction->IsGtBias(),
+                                 type,
+                                 locations,
+                                 &true_label);
       break;
   }
+
+  // Convert the branches into the result.
+  Mips64Label done;
+
+  // False case: result = 0.
+  __ LoadConst32(dst, 0);
+  __ Bc(&done);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ LoadConst32(dst, 1);
+  __ Bind(&done);
 }
 
 void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
@@ -2375,6 +2311,329 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond,
+                                                            bool is64bit,
+                                                            LocationSummary* locations) {
+  GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    if (is64bit) {
+      rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    }
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+  int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1);
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondEQ) {
+        __ Sltiu(dst, dst, 1);
+      } else {
+        __ Sltu(dst, ZERO, dst);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondGE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the slt instruction but no sge.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm_plus_one)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm_plus_one);
+        if (cond == kCondGT) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the slti instruction but no sgti.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        if (cond == kCondLE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the slt instruction but no sle.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      if (cond == kCondAE) {
+        // Simulate lhs >= rhs via !(lhs < rhs) since there's
+        // only the sltu instruction but no sgeu.
+        __ Xori(dst, dst, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+        __ Sltiu(dst, lhs, rhs_imm_plus_one);
+        if (cond == kCondA) {
+          // Simulate lhs > rhs via !(lhs <= rhs) since there's
+          // only the sltiu instruction but no sgtiu.
+          __ Xori(dst, dst, 1);
+        }
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst64(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        if (cond == kCondBE) {
+          // Simulate lhs <= rhs via !(rhs < lhs) since there's
+          // only the sltu instruction but no sleu.
+          __ Xori(dst, dst, 1);
+        }
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond,
+                                                                     bool is64bit,
+                                                                     LocationSummary* locations,
+                                                                     Mips64Label* label) {
+  GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+  Location rhs_location = locations->InAt(1);
+  GpuRegister rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    if (is64bit) {
+      rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+    } else {
+      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+    }
+  } else {
+    rhs_reg = rhs_location.AsRegister<GpuRegister>();
+  }
+
+  if (use_imm && rhs_imm == 0) {
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqzc(lhs, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnezc(lhs, label);
+        break;
+      case kCondLT:
+        __ Bltzc(lhs, label);
+        break;
+      case kCondGE:
+        __ Bgezc(lhs, label);
+        break;
+      case kCondLE:
+        __ Blezc(lhs, label);
+        break;
+      case kCondGT:
+        __ Bgtzc(lhs, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ Bc(label);
+        break;
+    }
+  } else {
+    if (use_imm) {
+      rhs_reg = TMP;
+      __ LoadConst64(rhs_reg, rhs_imm);
+    }
+    switch (cond) {
+      case kCondEQ:
+        __ Beqc(lhs, rhs_reg, label);
+        break;
+      case kCondNE:
+        __ Bnec(lhs, rhs_reg, label);
+        break;
+      case kCondLT:
+        __ Bltc(lhs, rhs_reg, label);
+        break;
+      case kCondGE:
+        __ Bgec(lhs, rhs_reg, label);
+        break;
+      case kCondLE:
+        __ Bgec(rhs_reg, lhs, label);
+        break;
+      case kCondGT:
+        __ Bltc(rhs_reg, lhs, label);
+        break;
+      case kCondB:
+        __ Bltuc(lhs, rhs_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeuc(lhs, rhs_reg, label);
+        break;
+      case kCondBE:
+        __ Bgeuc(rhs_reg, lhs, label);
+        break;
+      case kCondA:
+        __ Bltuc(rhs_reg, lhs, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond,
+                                                                bool gt_bias,
+                                                                Primitive::Type type,
+                                                                LocationSummary* locations,
+                                                                Mips64Label* label) {
+  FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>();
+  FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>();
+  if (type == Primitive::kPrimFloat) {
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondNE:
+        __ CmpEqS(FTMP, lhs, rhs);
+        __ Bc1eqz(FTMP, label);
+        break;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtS(FTMP, lhs, rhs);
+        } else {
+          __ CmpUltS(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeS(FTMP, lhs, rhs);
+        } else {
+          __ CmpUleS(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltS(FTMP, rhs, lhs);
+        } else {
+          __ CmpLtS(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleS(FTMP, rhs, lhs);
+        } else {
+          __ CmpLeS(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondNE:
+        __ CmpEqD(FTMP, lhs, rhs);
+        __ Bc1eqz(FTMP, label);
+        break;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtD(FTMP, lhs, rhs);
+        } else {
+          __ CmpUltD(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeD(FTMP, lhs, rhs);
+        } else {
+          __ CmpUleD(FTMP, lhs, rhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltD(FTMP, rhs, lhs);
+        } else {
+          __ CmpLtD(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleD(FTMP, rhs, lhs);
+        } else {
+          __ CmpLeD(FTMP, rhs, lhs);
+        }
+        __ Bc1nez(FTMP, label);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
                                                            size_t condition_input_index,
                                                            Mips64Label* true_target,
@@ -2420,97 +2679,27 @@
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
     HCondition* condition = cond->AsCondition();
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    Mips64Label* branch_target = true_target;
 
-    GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
-    Location rhs_location = condition->GetLocations()->InAt(1);
-    GpuRegister rhs_reg = ZERO;
-    int32_t rhs_imm = 0;
-    bool use_imm = rhs_location.IsConstant();
-    if (use_imm) {
-      rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
-    } else {
-      rhs_reg = rhs_location.AsRegister<GpuRegister>();
-    }
-
-    IfCondition if_cond;
-    Mips64Label* non_fallthrough_target;
     if (true_target == nullptr) {
       if_cond = condition->GetOppositeCondition();
-      non_fallthrough_target = false_target;
-    } else {
-      if_cond = condition->GetCondition();
-      non_fallthrough_target = true_target;
+      branch_target = false_target;
     }
 
-    if (use_imm && rhs_imm == 0) {
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beqzc(lhs, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bnezc(lhs, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Bltzc(lhs, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bgezc(lhs, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Blezc(lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Bgtzc(lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          break;  // always false
-        case kCondBE:
-          __ Beqzc(lhs, non_fallthrough_target);  // <= 0 if zero
-          break;
-        case kCondA:
-          __ Bnezc(lhs, non_fallthrough_target);  // > 0 if non-zero
-          break;
-        case kCondAE:
-          __ Bc(non_fallthrough_target);  // always true
-          break;
-      }
-    } else {
-      if (use_imm) {
-        rhs_reg = TMP;
-        __ LoadConst32(rhs_reg, rhs_imm);
-      }
-      switch (if_cond) {
-        case kCondEQ:
-          __ Beqc(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondNE:
-          __ Bnec(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondLT:
-          __ Bltc(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondGE:
-          __ Bgec(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondLE:
-          __ Bgec(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondGT:
-          __ Bltc(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondB:
-          __ Bltuc(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondAE:
-          __ Bgeuc(lhs, rhs_reg, non_fallthrough_target);
-          break;
-        case kCondBE:
-          __ Bgeuc(rhs_reg, lhs, non_fallthrough_target);
-          break;
-        case kCondA:
-          __ Bltuc(rhs_reg, lhs, non_fallthrough_target);
-          break;
-      }
+    switch (type) {
+      default:
+        GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target);
+        break;
+      case Primitive::kPrimLong:
+        GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
     }
   }
 
@@ -2556,6 +2745,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
                                             const FieldInfo& field_info ATTRIBUTE_UNUSED) {
   LocationSummary* locations =
@@ -3886,83 +4083,83 @@
 }
 
 void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) {
@@ -3991,17 +4188,34 @@
   GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
+  // Create a set of compare/jumps.
+  GpuRegister temp_reg = TMP;
+  if (IsInt<16>(-lower_bound)) {
+    __ Addiu(temp_reg, value_reg, -lower_bound);
+  } else {
+    __ LoadConst32(AT, -lower_bound);
+    __ Addu(temp_reg, value_reg, AT);
+  }
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqzc(value_reg, succ);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beqc(value_reg, TMP, succ);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 85e3a4a..60ff96d 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -189,6 +189,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -224,6 +225,7 @@
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -237,6 +239,16 @@
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations);
+  void GenerateIntLongCompareAndBranch(IfCondition cond,
+                                       bool is64bit,
+                                       LocationSummary* locations,
+                                       Mips64Label* label);
+  void GenerateFpCompareAndBranch(IfCondition cond,
+                                  bool gt_bias,
+                                  Primitive::Type type,
+                                  LocationSummary* locations,
+                                  Mips64Label* label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   Mips64Assembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index bc3256e..fd18917 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,7 +42,6 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = EAX;
-
 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
 
 static constexpr int kC2ConditionMask = 0x400;
@@ -1555,7 +1554,7 @@
 
     Location lhs = condition->GetLocations()->InAt(0);
     Location rhs = condition->GetLocations()->InAt(1);
-    // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
+    // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition).
     if (rhs.IsRegister()) {
       __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
     } else if (rhs.IsConstant()) {
@@ -1617,6 +1616,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderX86::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1660,7 +1667,7 @@
 void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
-void LocationsBuilderX86::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1693,7 +1700,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1754,83 +1761,83 @@
 }
 
 void LocationsBuilderX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
@@ -4157,7 +4164,7 @@
    */
   switch (kind) {
     case MemBarrierKind::kAnyAny: {
-      __ mfence();
+      MemoryFence();
       break;
     }
     case MemBarrierKind::kAnyStore:
@@ -6752,31 +6759,67 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+                                                              int32_t lower_bound,
+                                                              uint32_t num_entries,
+                                                              HBasicBlock* switch_block,
+                                                              HBasicBlock* default_block) {
+  // Figure out the correct compare values and jump conditions.
+  // Handle the first compare/branch as a special case because it might
+  // jump to the default case.
+  DCHECK_GT(num_entries, 2u);
+  Condition first_condition;
+  uint32_t index;
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  if (lower_bound != 0) {
+    first_condition = kLess;
+    __ cmpl(value_reg, Immediate(lower_bound));
+    __ j(first_condition, codegen_->GetLabelOf(default_block));
+    __ j(kEqual, codegen_->GetLabelOf(successors[0]));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+    index = 1;
+  } else {
+    // Handle all the compare/jumps below.
+    first_condition = kBelow;
+    index = 0;
+  }
+
+  // Handle the rest of the compare/jumps.
+  for (; index + 1 < num_entries; index += 2) {
+    int32_t compare_to_value = lower_bound + index + 1;
+    __ cmpl(value_reg, Immediate(compare_to_value));
+    // Jump to successors[index] if value < case_value[index].
+    __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+    // Jump to successors[index + 1] if value == case_value[index + 1].
+    __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+  }
+
+  if (index != num_entries) {
+    // There are an odd number of entries. Handle the last one.
+    DCHECK_EQ(index + 1, num_entries);
+    __ cmpl(value_reg, Immediate(lower_bound + index));
+    __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
+  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+    __ jmp(codegen_->GetLabelOf(default_block));
   }
 }
 
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+  GenPackedSwitchWithCompares(value_reg,
+                              lower_bound,
+                              num_entries,
+                              switch_instr->GetBlock(),
+                              switch_instr->GetDefaultBlock());
+}
+
 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6791,11 +6834,20 @@
 
 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    GenPackedSwitchWithCompares(value_reg,
+                                lower_bound,
+                                num_entries,
+                                switch_instr->GetBlock(),
+                                default_block);
+    return;
+  }
+
   // Optimizing has a jump area.
   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
   Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6807,7 +6859,7 @@
   }
 
   // Is the value in range?
-  DCHECK_GE(num_entries, 1);
+  DCHECK_GE(num_entries, 1u);
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 7c292fa..3d34317 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
 
+#include "arch/x86/instruction_set_features_x86.h"
 #include "code_generator.h"
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
@@ -166,6 +167,7 @@
  private:
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void HandleInvoke(HInvoke* invoke);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -195,6 +197,11 @@
 
   X86Assembler* GetAssembler() const { return assembler_; }
 
+  // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+  // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+  // generates less code/data with a small num_entries.
+  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
  private:
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
@@ -207,6 +214,7 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem* rem);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
@@ -269,6 +277,11 @@
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenPackedSwitchWithCompares(Register value_reg,
+                                   int32_t lower_bound,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block,
+                                   HBasicBlock* default_block);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
@@ -496,6 +509,19 @@
   // artReadBarrierForRootSlow.
   void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
 
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack.
+  // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+  void MemoryFence(bool non_temporal = false) {
+    if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+      assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 92cef5f..ffd8c42 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,6 +41,10 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
 
 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -1596,6 +1600,14 @@
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  codegen_->RecordPcInfo(info, info->GetDexPc());
+}
+
 void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1639,7 +1651,7 @@
 void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) {
 }
 
-void LocationsBuilderX86_64::VisitCondition(HCondition* cond) {
+void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
   // Handle the long/FP comparisons made in instruction simplification.
@@ -1663,7 +1675,7 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) {
+void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
   if (!cond->NeedsMaterialization()) {
     return;
   }
@@ -1761,83 +1773,83 @@
 }
 
 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
-  VisitCondition(comp);
+  HandleCondition(comp);
 }
 
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
@@ -4029,7 +4041,7 @@
    */
   switch (kind) {
     case MemBarrierKind::kAnyAny: {
-      __ mfence();
+      MemoryFence();
       break;
     }
     case MemBarrierKind::kAnyStore:
@@ -6331,11 +6343,58 @@
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Should we generate smaller inline compare/jumps?
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    // Figure out the correct compare values and jump conditions.
+    // Handle the first compare/branch as a special case because it might
+    // jump to the default case.
+    DCHECK_GT(num_entries, 2u);
+    Condition first_condition;
+    uint32_t index;
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    if (lower_bound != 0) {
+      first_condition = kLess;
+      __ cmpl(value_reg_in, Immediate(lower_bound));
+      __ j(first_condition, codegen_->GetLabelOf(default_block));
+      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+      index = 1;
+    } else {
+      // Handle all the compare/jumps below.
+      first_condition = kBelow;
+      index = 0;
+    }
+
+    // Handle the rest of the compare/jumps.
+    for (; index + 1 < num_entries; index += 2) {
+      int32_t compare_to_value = lower_bound + index + 1;
+      __ cmpl(value_reg_in, Immediate(compare_to_value));
+      // Jump to successors[index] if value < case_value[index].
+      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+      // Jump to successors[index + 1] if value == case_value[index + 1].
+      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+    }
+
+    if (index != num_entries) {
+      // There are an odd number of entries. Handle the last one.
+      DCHECK_EQ(index + 1, num_entries);
+      __ cmpl(value_reg_in, Immediate(lower_bound + index));
+      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+    }
+    return;
+  }
 
   // Remove the bias, if needed.
   Register value_reg_out = value_reg_in.AsRegister();
@@ -6346,7 +6405,6 @@
   CpuRegister value_reg(value_reg_out);
 
   // Is the value in range?
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dda9ea2..9995416 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 
+#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
@@ -171,6 +172,7 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction);
@@ -212,6 +214,7 @@
   void DivByPowerOfTwo(HDiv* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void HandleCondition(HCondition* condition);
   void HandleShift(HBinaryOperation* operation);
 
   void HandleFieldSet(HInstruction* instruction,
@@ -479,6 +482,18 @@
                           int64_t v,
                           HInstruction* instruction);
 
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack. The locked add should not be used for
+  // ordering non-temporal stores.
+  void MemoryFence(bool force_mfence = false) {
+    if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+      assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 02e5dab..67ff87a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -165,6 +165,7 @@
       if (!inst->HasSideEffects()
           && !inst->CanThrow()
           && !inst->IsSuspendCheck()
+          && !inst->IsNativeDebugInfo()
           // If we added an explicit barrier then we should keep it.
           && !inst->IsMemoryBarrier()
           && !inst->IsParameterValue()
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index de60cf2..78cb7d4 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -28,7 +28,7 @@
 TEST(GVNTest, LocalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -113,7 +113,7 @@
 TEST(GVNTest, GlobalFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -196,7 +196,7 @@
 TEST(GVNTest, LoopFieldElimination) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   HGraph* graph = CreateGraph(&allocator);
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -319,7 +319,7 @@
 TEST(GVNTest, LoopSideEffects) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
 
   static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC();
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 67097de..c504ded 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -777,13 +777,6 @@
 void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
   // Try to fold an HCompare into this HCondition.
 
-  // This simplification is currently supported on x86, x86_64, ARM and ARM64.
-  // TODO: Implement it for MIPS64.
-  InstructionSet instruction_set = GetGraph()->GetInstructionSet();
-  if (instruction_set == kMips64) {
-    return;
-  }
-
   HInstruction* left = condition->GetLeft();
   HInstruction* right = condition->GetRight();
   // We can only replace an HCondition which compares a Compare to 0.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index e8181bb..4683aee 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -825,8 +825,15 @@
   Label loop_head;
   __ Bind(&loop_head);
 
+  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+  // the reference stored in the object before attempting the CAS,
+  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+  // implementation.
+  //
+  // Note that this code is not (yet) used when read barriers are
+  // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
+  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
   __ ldrex(tmp_lo, tmp_ptr);
-  // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
 
   __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
 
@@ -852,15 +859,17 @@
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic does not always work when heap
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS below).
+  //
+  // Also, the UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix.
+  // off temporarily as a quick fix (b/26204023).
   //
-  // TODO(rpl): Fix it and turn it back on.
-  //
-  // TODO(rpl): Also, we should investigate whether we need a read
-  // barrier in the generated code.
-  if (kPoisonHeapReferences) {
+  // TODO(rpl): Fix these two issues and re-enable this intrinsic.
+  if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
     return;
   }
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 6b34daa..9f6863c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1031,10 +1031,15 @@
   } else {
     __ Dmb(InnerShareable, BarrierWrites);
     __ Bind(&loop_head);
-    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
-    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
+    // the reference stored in the object before attempting the CAS,
+    // similar to the one in the art::Unsafe_compareAndSwapObject JNI
+    // implementation.
+    //
     // Note that this code is not (yet) used when read barriers are
     // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
+    DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
+    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1057,15 +1062,17 @@
   CreateIntIntIntIntIntToInt(arena_, invoke);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic does not always work when heap
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented (see TODO in GenCAS below).
+  //
+  // Also, the UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
-  // off temporarily as a quick fix.
+  // off temporarily as a quick fix (b/26204023).
   //
-  // TODO(rpl): Fix it and turn it back on.
-  //
-  // TODO(rpl): Also, we should investigate whether we need a read
-  // barrier in the generated code.
-  if (kPoisonHeapReferences) {
+  // TODO(rpl): Fix these two issues and re-enable this intrinsic.
+  if (kEmitCompilerReadBarrier || kPoisonHeapReferences) {
     return;
   }
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8aa7d9f..8b45ea7 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1299,6 +1299,8 @@
   if (type == Primitive::kPrimLong) {
     __ Lld(out, TMP);
   } else {
+    // Note: We will need a read barrier here, when read barrier
+    // support is added to the MIPS64 back end.
     __ Ll(out, TMP);
   }
   __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index fd454d8..8019062 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2005,7 +2005,7 @@
   }
 
   if (is_volatile) {
-    __ mfence();
+    codegen->MemoryFence();
   }
 
   if (type == Primitive::kPrimNot) {
@@ -2085,6 +2085,17 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented.
+  //
+  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
 }
 
@@ -2136,6 +2147,13 @@
       __ PoisonHeapReference(value);
     }
 
+    // TODO: Add a read barrier for the reference stored in the object
+    // before attempting the CAS, similar to the one in the
+    // art::Unsafe_compareAndSwapObject JNI implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
+    DCHECK(!kEmitCompilerReadBarrier);
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
 
     // LOCK CMPXCHG has full barrier semantics, and we don't need
@@ -2145,11 +2163,8 @@
     __ setb(kZero, out.AsRegister<Register>());
     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
 
-    // In the case of the `UnsafeCASObject` intrinsic, accessing an
-    // object in the heap with LOCK CMPXCHG does not require a read
-    // barrier, as we do not keep a reference to this heap location.
-    // However, if heap poisoning is enabled, we need to unpoison the
-    // values that were poisoned earlier.
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value` has been moved to a temporary register, no need to
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ce737e3..aa1c109 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2080,7 +2080,7 @@
   }
 
   if (is_volatile) {
-    __ mfence();
+    codegen->MemoryFence();
   }
 
   if (type == Primitive::kPrimNot) {
@@ -2150,6 +2150,17 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The UnsafeCASObject intrinsic is missing a read barrier, and
+  // therefore sometimes does not work as expected (b/25883050).
+  // Turn it off temporarily as a quick fix, until the read barrier is
+  // implemented.
+  //
+  // TODO(rpl): Implement a read barrier in GenCAS below and re-enable
+  // this intrinsic.
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
   CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
 }
 
@@ -2200,6 +2211,13 @@
       __ PoisonHeapReference(CpuRegister(value_reg));
     }
 
+    // TODO: Add a read barrier for the reference stored in the object
+    // before attempting the CAS, similar to the one in the
+    // art::Unsafe_compareAndSwapObject JNI implementation.
+    //
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
+    DCHECK(!kEmitCompilerReadBarrier);
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
 
     // LOCK CMPXCHG has full barrier semantics, and we don't need
@@ -2209,11 +2227,8 @@
     __ setcc(kZero, out);
     __ movzxb(out, out);
 
-    // In the case of the `UnsafeCASObject` intrinsic, accessing an
-    // object in the heap with LOCK CMPXCHG does not require a read
-    // barrier, as we do not keep a reference to this heap location.
-    // However, if heap poisoning is enabled, we need to unpoison the
-    // values that were poisoned earlier.
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value_reg` has been moved to a temporary register, no need
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 2bb769a..9ad003c 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -107,7 +107,7 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get field with different types.
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
                                                                 Primitive::kPrimLong,
                                                                 MemberOffset(10),
@@ -134,7 +134,7 @@
   BuildLoop();
 
   // Populate the loop with instructions: set/get field with same types.
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_,
                                                                 Primitive::kPrimLong,
                                                                 MemberOffset(10),
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index adde004..727f2bb 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -119,10 +119,16 @@
       : ref_info_(ref_info),
         offset_(offset),
         index_(index),
-        declaring_class_def_index_(declaring_class_def_index) {
+        declaring_class_def_index_(declaring_class_def_index),
+        value_killed_by_loop_side_effects_(true) {
     DCHECK(ref_info != nullptr);
     DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
            (offset != kInvalidFieldOffset && index == nullptr));
+    if (ref_info->IsSingleton() && !IsArrayElement()) {
+      // Assume this location's value cannot be killed by loop side effects
+      // until proven otherwise.
+      value_killed_by_loop_side_effects_ = false;
+    }
   }
 
   ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
@@ -139,11 +145,22 @@
     return index_ != nullptr;
   }
 
+  bool IsValueKilledByLoopSideEffects() const {
+    return value_killed_by_loop_side_effects_;
+  }
+
+  void SetValueKilledByLoopSideEffects(bool val) {
+    value_killed_by_loop_side_effects_ = val;
+  }
+
  private:
   ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
   const size_t offset_;                // offset of static/instance field.
   HInstruction* const index_;          // index of an array element.
   const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
+  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
+                                             // side effects because this location is stored
+                                             // into inside a loop.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocation);
 };
@@ -370,13 +387,13 @@
     return heap_locations_[heap_location_idx];
   }
 
-  void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
+  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
     if (field_info.IsVolatile()) {
       has_volatile_ = true;
     }
     const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
     const size_t offset = field_info.GetFieldOffset().SizeValue();
-    GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
   }
 
   void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -390,8 +407,11 @@
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
+    if (instruction->GetBlock()->GetLoopInformation() != nullptr) {
+      location->SetValueKilledByLoopSideEffects(true);
+    }
   }
 
   void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
@@ -565,23 +585,26 @@
     HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
     ArenaVector<HInstruction*>& pre_header_heap_values =
         heap_values_for_[pre_header->GetBlockId()];
+    // Inherit the values from pre-header.
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      heap_values[i] = pre_header_heap_values[i];
+    }
+
     // We do a single pass in reverse post order. For loops, use the side effects as a hint
     // to see if the heap values should be killed.
     if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
-      for (size_t i = 0; i < pre_header_heap_values.size(); i++) {
-        // heap value is killed by loop side effects, need to keep the last store.
-        KeepIfIsStore(pre_header_heap_values[i]);
-      }
-      if (kIsDebugBuild) {
-        // heap_values should all be kUnknownHeapValue that it is inited with.
-        for (size_t i = 0; i < heap_values.size(); i++) {
-          DCHECK_EQ(heap_values[i], kUnknownHeapValue);
-        }
-      }
-    } else {
-      // Inherit the values from pre-header.
       for (size_t i = 0; i < heap_values.size(); i++) {
-        heap_values[i] = pre_header_heap_values[i];
+        HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+        ReferenceInfo* ref_info = location->GetReferenceInfo();
+        if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
+          // heap value is killed by loop side effects (stored into directly, or due to
+          // aliasing).
+          KeepIfIsStore(pre_header_heap_values[i]);
+          heap_values[i] = kUnknownHeapValue;
+        } else {
+          // A singleton's field that's not stored into inside a loop is invariant throughout
+          // the loop.
+        }
       }
     }
   }
@@ -655,6 +678,16 @@
     }
   }
 
+  static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) {
+    return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) ||
+           (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt);
+  }
+
+  static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) {
+    return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) ||
+           (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong);
+  }
+
   void VisitGetLocation(HInstruction* instruction,
                         HInstruction* ref,
                         size_t offset,
@@ -686,7 +719,8 @@
     if ((heap_value != kUnknownHeapValue) &&
         // Keep the load due to possible I/F, J/D array aliasing.
         // See b/22538329 for details.
-        (heap_value->GetType() == instruction->GetType())) {
+        !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) &&
+        !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) {
       removed_loads_.push_back(instruction);
       substitute_instructions_for_loads_.push_back(heap_value);
       TryRemovingNullCheck(instruction);
@@ -751,6 +785,9 @@
         if (loop_info != nullptr) {
           // instruction is a store in the loop so the loop must does write.
           DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+          // If it's a singleton, IsValueKilledByLoopSideEffects() must be true.
+          DCHECK(!ref_info->IsSingleton() ||
+                 heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects());
 
           if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
             DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 926bc15..a37298c 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -777,6 +777,10 @@
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
 }
 
+HInstruction::InstructionKind HInstruction::GetKind() const {
+  return GetKindInternal();
+}
+
 HInstruction* HInstruction::GetNextDisregardingMoves() const {
   HInstruction* next = GetNext();
   while (next != nullptr && next->IsParallelMove()) {
@@ -960,7 +964,7 @@
   visitor->Visit##name(this);                                                  \
 }
 
-FOR_EACH_INSTRUCTION(DEFINE_ACCEPT)
+FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT)
 
 #undef DEFINE_ACCEPT
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 1f8ef47..db3e969 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1034,7 +1034,6 @@
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
-  M(Condition, BinaryOperation)                                         \
   M(CurrentMethod, Instruction)                                         \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
@@ -1067,6 +1066,7 @@
   M(MemoryBarrier, Instruction)                                         \
   M(MonitorOperation, Instruction)                                      \
   M(Mul, BinaryOperation)                                               \
+  M(NativeDebugInfo, Instruction)                                       \
   M(Neg, UnaryOperation)                                                \
   M(NewArray, Instruction)                                              \
   M(NewInstance, Instruction)                                           \
@@ -1141,27 +1141,34 @@
   FOR_EACH_CONCRETE_INSTRUCTION_X86(M)                                  \
   FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
 
-#define FOR_EACH_INSTRUCTION(M)                                         \
-  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+#define FOR_EACH_ABSTRACT_INSTRUCTION(M)                                \
+  M(Condition, BinaryOperation)                                         \
   M(Constant, Instruction)                                              \
   M(UnaryOperation, Instruction)                                        \
   M(BinaryOperation, Instruction)                                       \
   M(Invoke, Instruction)
 
+#define FOR_EACH_INSTRUCTION(M)                                         \
+  FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
+  FOR_EACH_ABSTRACT_INSTRUCTION(M)
+
 #define FORWARD_DECLARATION(type, super) class H##type;
 FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
 #undef FORWARD_DECLARATION
 
 #define DECLARE_INSTRUCTION(type)                                       \
-  InstructionKind GetKind() const OVERRIDE { return k##type; }          \
+  InstructionKind GetKindInternal() const OVERRIDE { return k##type; }  \
   const char* DebugName() const OVERRIDE { return #type; }              \
-  const H##type* As##type() const OVERRIDE { return this; }             \
-  H##type* As##type() OVERRIDE { return this; }                         \
   bool InstructionTypeEquals(HInstruction* other) const OVERRIDE {      \
     return other->Is##type();                                           \
   }                                                                     \
   void Accept(HGraphVisitor* visitor) OVERRIDE
 
+#define DECLARE_ABSTRACT_INSTRUCTION(type)                              \
+  bool Is##type() const { return As##type() != nullptr; }               \
+  const H##type* As##type() const { return this; }                      \
+  H##type* As##type() { return this; }
+
 template <typename T> class HUseList;
 
 template <typename T>
@@ -1972,11 +1979,18 @@
   void MoveBeforeFirstUserAndOutOfLoops();
 
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  bool Is##type() const;                                                       \
+  const H##type* As##type() const;                                             \
+  H##type* As##type();
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
   virtual H##type* As##type() { return nullptr; }
-
-  FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+  FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
 #undef INSTRUCTION_TYPE_CHECK
 
   // Returns whether the instruction can be moved within the graph.
@@ -1999,7 +2013,12 @@
   // 2) Their inputs are identical.
   bool Equals(HInstruction* other) const;
 
-  virtual InstructionKind GetKind() const = 0;
+  // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
+  // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
+  // the virtual function because the __attribute__((__pure__)) doesn't really
+  // apply the strong requirement for virtual functions, preventing optimizations.
+  InstructionKind GetKind() const PURE;
+  virtual InstructionKind GetKindInternal() const = 0;
 
   virtual size_t ComputeHashCode() const {
     size_t result = GetKind();
@@ -2297,7 +2316,7 @@
 
   virtual uint64_t GetValueAsUint64() const = 0;
 
-  DECLARE_INSTRUCTION(Constant);
+  DECLARE_ABSTRACT_INSTRUCTION(Constant);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HConstant);
@@ -2558,7 +2577,7 @@
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
 
-  DECLARE_INSTRUCTION(UnaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HUnaryOperation);
@@ -2651,7 +2670,7 @@
   // one. Otherwise it returns null.
   HInstruction* GetLeastConstantLeft() const;
 
-  DECLARE_INSTRUCTION(BinaryOperation);
+  DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation);
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HBinaryOperation);
@@ -2679,7 +2698,7 @@
   // `instruction`, and disregard moves in between.
   bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const;
 
-  DECLARE_INSTRUCTION(Condition);
+  DECLARE_ABSTRACT_INSTRUCTION(Condition);
 
   virtual IfCondition GetCondition() const = 0;
 
@@ -3288,7 +3307,7 @@
 
   bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
 
-  DECLARE_INSTRUCTION(Invoke);
+  DECLARE_ABSTRACT_INSTRUCTION(Invoke);
 
  protected:
   HInvoke(ArenaAllocator* arena,
@@ -4854,6 +4873,23 @@
   DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
 };
 
+// Pseudo-instruction which provides the native debugger with mapping information.
+// It ensures that we can generate line number and local variables at this point.
+class HNativeDebugInfo : public HTemplateInstruction<0> {
+ public:
+  explicit HNativeDebugInfo(uint32_t dex_pc)
+      : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {}
+
+  bool NeedsEnvironment() const OVERRIDE {
+    return true;
+  }
+
+  DECLARE_INSTRUCTION(NativeDebugInfo);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo);
+};
+
 /**
  * Instruction to load a Class object.
  */
@@ -5869,6 +5905,18 @@
   return &lhs == &rhs;
 }
 
+#define INSTRUCTION_TYPE_CHECK(type, super)                                    \
+  inline bool HInstruction::Is##type() const { return GetKind() == k##type; }  \
+  inline const H##type* HInstruction::As##type() const {                       \
+    return Is##type() ? down_cast<const H##type*>(this) : nullptr;             \
+  }                                                                            \
+  inline H##type* HInstruction::As##type() {                                   \
+    return Is##type() ? static_cast<H##type*>(this) : nullptr;                 \
+  }
+
+  FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 30bcf19..176c50c 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -169,7 +169,7 @@
         // If `other_move` was swapped, we iterate again to find a new
         // potential cycle.
         required_swap = nullptr;
-        i = 0;
+        i = -1;
       } else if (required_swap != nullptr) {
         // A move is required to swap. We walk back the cycle to find the
         // move by just returning from this `PerforrmMove`.
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 46e6f3e..5e8fe37 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -609,4 +609,36 @@
   }
 }
 
+TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  {
+    TypeParam resolver(&allocator);
+    HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+    moves->AddMove(
+        Location::RegisterLocation(0),
+        Location::RegisterLocation(3),
+        Primitive::kPrimInt,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterPairLocation(2, 3),
+        Location::RegisterPairLocation(0, 1),
+        Primitive::kPrimLong,
+        nullptr);
+    moves->AddMove(
+        Location::RegisterLocation(7),
+        Location::RegisterLocation(2),
+        Primitive::kPrimInt,
+        nullptr);
+    resolver.EmitNativeCode(moves);
+    if (TestFixture::has_swap) {
+      ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str());
+    } else {
+      ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)",
+          resolver.GetMessage().c_str());
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index b383f1e..a385448 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,6 +15,7 @@
  */
 
 #include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
 
 namespace art {
 namespace x86 {
@@ -79,6 +80,10 @@
   }
 
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    if (switch_insn->GetNumEntries() <=
+        InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+      return;
+    }
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
     InitializePCRelativeBasePointer();
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 080f970..8706854 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -472,7 +472,7 @@
                                   HInstruction** input2) {
   HGraph* graph = CreateGraph(allocator);
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
   HInstruction* parameter = new (allocator) HParameterValue(
@@ -624,7 +624,7 @@
                                 HInstruction** field,
                                 HInstruction** ret) {
   HGraph* graph = CreateGraph(allocator);
-  NullHandle<mirror::DexCache> dex_cache;
+  ScopedNullHandle<mirror::DexCache> dex_cache;
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 98a1a8f..b79c2f0 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -878,7 +878,15 @@
                                      Register rn,
                                      Opcode opcode,
                                      uint32_t immediate,
+                                     SetCc set_cc,
                                      ShifterOperand* shifter_op) = 0;
+  bool ShifterOperandCanHold(Register rd,
+                             Register rn,
+                             Opcode opcode,
+                             uint32_t immediate,
+                             ShifterOperand* shifter_op) {
+    return ShifterOperandCanHold(rd, rn, opcode, immediate, kCcDontCare, shifter_op);
+  }
 
   virtual bool ShifterOperandCanAlwaysHold(uint32_t immediate) = 0;
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a7dbacd..ebca25b 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -57,6 +57,7 @@
                                            Register rn ATTRIBUTE_UNUSED,
                                            Opcode opcode ATTRIBUTE_UNUSED,
                                            uint32_t immediate,
+                                           SetCc set_cc ATTRIBUTE_UNUSED,
                                            ShifterOperand* shifter_op) {
   return ShifterOperandCanHoldArm32(immediate, shifter_op);
 }
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index ce3a872..bf332fe 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -297,7 +297,9 @@
                              Register rn,
                              Opcode opcode,
                              uint32_t immediate,
+                             SetCc set_cc,
                              ShifterOperand* shifter_op) OVERRIDE;
+  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
 
   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index cdeb443..f341030 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -500,6 +500,7 @@
                                             Register rn ATTRIBUTE_UNUSED,
                                             Opcode opcode,
                                             uint32_t immediate,
+                                            SetCc set_cc,
                                             ShifterOperand* shifter_op) {
   shifter_op->type_ = ShifterOperand::kImmediate;
   shifter_op->immed_ = immediate;
@@ -508,7 +509,8 @@
   switch (opcode) {
     case ADD:
     case SUB:
-      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+      // Less than (or equal to) 12 bits can be done if we don't need to set condition codes.
+      if (immediate < (1 << 12) && set_cc != kCcSet) {
         return true;
       }
       return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
@@ -1239,7 +1241,10 @@
       // The only thumb1 instructions with a register and an immediate are ADD and SUB
       // with a 3-bit immediate, and RSB with zero immediate.
       if (opcode == ADD || opcode == SUB) {
-        if (!IsUint<3>(so.GetImmediate())) {
+        if ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet) {
+          return true;  // Cannot match "setflags".
+        }
+        if (!IsUint<3>(so.GetImmediate()) && !IsUint<3>(-so.GetImmediate())) {
           return true;
         }
       } else {
@@ -1249,8 +1254,12 @@
       // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
       if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
         return true;
+      } else if (opcode != CMP && ((cond == AL) ? set_cc == kCcKeep : set_cc == kCcSet)) {
+        return true;  // Cannot match "setflags" for ADD, SUB or MOV.
       } else {
-        if (!IsUint<8>(so.GetImmediate())) {
+        // For ADD and SUB allow also negative 8-bit immediate as we will emit the oposite opcode.
+        if (!IsUint<8>(so.GetImmediate()) &&
+            (opcode == MOV || opcode == CMP || !IsUint<8>(-so.GetImmediate()))) {
           return true;
         }
       }
@@ -1602,12 +1611,18 @@
   uint8_t rn_shift = 3;
   uint8_t immediate_shift = 0;
   bool use_immediate = false;
-  uint32_t immediate = 0;  // Should be at most 9 bits but keep the full immediate for CHECKs.
+  uint32_t immediate = 0;  // Should be at most 10 bits but keep the full immediate for CHECKs.
   uint8_t thumb_opcode;
 
   if (so.IsImmediate()) {
     use_immediate = true;
     immediate = so.GetImmediate();
+    if (!IsUint<10>(immediate)) {
+      // Flip ADD/SUB.
+      opcode = (opcode == ADD) ? SUB : ADD;
+      immediate = -immediate;
+      DCHECK(IsUint<10>(immediate));  // More stringent checks below.
+    }
   }
 
   switch (opcode) {
@@ -1644,7 +1659,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 3U /* 0b11 */;
           opcode_shift = 12;
-          CHECK_LT(immediate, (1u << 9));
+          CHECK(IsUint<9>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1658,7 +1673,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 5U /* 0b101 */;
           opcode_shift = 11;
-          CHECK_LT(immediate, (1u << 10));
+          CHECK(IsUint<10>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rn from instruction.
@@ -1668,11 +1683,13 @@
           immediate >>= 2;
         } else if (rn != rd) {
           // Must use T1.
+          CHECK(IsUint<3>(immediate));
           opcode_shift = 9;
           thumb_opcode = 14U /* 0b01110 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
+          CHECK(IsUint<8>(immediate));
           opcode_shift = 11;
           thumb_opcode = 6U /* 0b110 */;
           rd_shift = 8;
@@ -1702,7 +1719,7 @@
           dp_opcode = 2U /* 0b10 */;
           thumb_opcode = 0x61 /* 0b1100001 */;
           opcode_shift = 7;
-          CHECK_LT(immediate, (1u << 9));
+          CHECK(IsUint<9>(immediate));
           CHECK_ALIGNED(immediate, 4);
 
           // Remove rd and rn from instruction by orring it with immed and clearing bits.
@@ -1713,11 +1730,13 @@
           immediate >>= 2;
         } else if (rn != rd) {
           // Must use T1.
+          CHECK(IsUint<3>(immediate));
           opcode_shift = 9;
           thumb_opcode = 15U /* 0b01111 */;
           immediate_shift = 6;
         } else {
           // T2 encoding.
+          CHECK(IsUint<8>(immediate));
           opcode_shift = 11;
           thumb_opcode = 7U /* 0b111 */;
           rd_shift = 8;
@@ -3401,25 +3420,30 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
+  if (ShifterOperandCanHold(rd, rn, ADD, value, set_cc, &shifter_op)) {
     add(rd, rn, shifter_op, cond, set_cc);
-  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, set_cc, &shifter_op)) {
     sub(rd, rn, shifter_op, cond, set_cc);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
-      mvn(IP, shifter_op, cond, kCcKeep);
-      add(rd, rn, ShifterOperand(IP), cond, set_cc);
-    } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
-      mvn(IP, shifter_op, cond, kCcKeep);
-      sub(rd, rn, ShifterOperand(IP), cond, set_cc);
+    // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP.
+    Register temp = (rd != rn) ? rd : IP;
+    if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) {
+      mvn(temp, shifter_op, cond, kCcKeep);
+      add(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) {
+      mvn(temp, shifter_op, cond, kCcKeep);
+      sub(rd, rn, ShifterOperand(temp), cond, set_cc);
+    } else if (High16Bits(-value) == 0) {
+      movw(temp, Low16Bits(-value), cond);
+      sub(rd, rn, ShifterOperand(temp), cond, set_cc);
     } else {
-      movw(IP, Low16Bits(value), cond);
+      movw(temp, Low16Bits(value), cond);
       uint16_t value_high = High16Bits(value);
       if (value_high != 0) {
-        movt(IP, value_high, cond);
+        movt(temp, value_high, cond);
       }
-      add(rd, rn, ShifterOperand(IP), cond, set_cc);
+      add(rd, rn, ShifterOperand(temp), cond, set_cc);
     }
   }
 }
@@ -3429,9 +3453,9 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) {
+  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) {
     cmp(rn, shifter_op, cond);
-  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) {
     cmn(rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 9aeece8..bf07b2d 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -342,7 +342,9 @@
                              Register rn,
                              Opcode opcode,
                              uint32_t immediate,
+                             SetCc set_cc,
                              ShifterOperand* shifter_op) OVERRIDE;
+  using ArmAssembler::ShifterOperandCanHold;  // Don't hide the non-virtual override.
 
   bool ShifterOperandCanAlwaysHold(uint32_t immediate) OVERRIDE;
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 5ae2cc2..0ef0dc1 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -135,7 +135,8 @@
     toolsdir.c_str(), filename);
   if (kPrintResults) {
     // Print the results only, don't check. This is used to generate new output for inserting
-    // into the .inc file.
+    // into the .inc file, so let's add the appropriate prefix/suffix needed in the C++ code.
+    strcat(cmd, " | sed '-es/^/  \"/' | sed '-es/$/\\\\n\",/'");
     int cmd_result3 = system(cmd);
     ASSERT_EQ(cmd_result3, 0) << strerror(errno);
   } else {
@@ -1379,6 +1380,252 @@
   EmitAndCheck(&assembler, "CompareAndBranch");
 }
 
+TEST(Thumb2AssemblerTest, AddConstant) {
+  arm::Thumb2Assembler assembler;
+
+  // Low registers, Rd != Rn.
+  __ AddConstant(R0, R1, 0);                          // MOV.
+  __ AddConstant(R0, R1, 1);                          // 16-bit ADDS, encoding T1.
+  __ AddConstant(R0, R1, 7);                          // 16-bit ADDS, encoding T1.
+  __ AddConstant(R0, R1, 8);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 255);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R0, R1, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R0, R1, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R0, R1, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R1, -1);                         // 16-bit SUBS.
+  __ AddConstant(R0, R1, -7);                         // 16-bit SUBS.
+  __ AddConstant(R0, R1, -8);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -255);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R0, R1, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R0, R1, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10002);                   // MVN+ADD.
+  __ AddConstant(R0, R1, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // Low registers, Rd == Rn.
+  __ AddConstant(R0, R0, 0);                          // Nothing.
+  __ AddConstant(R1, R1, 1);                          // 16-bit ADDS, encoding T2,
+  __ AddConstant(R0, R0, 7);                          // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 8);                          // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 255);                        // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R1, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R0, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R0, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R1, R1, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R0, R0, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R1, R1, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R0, -1);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -7);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -8);                         // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -255);                       // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R0, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R1, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R1, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R0, R0, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R1, R1, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -0x10002);                   // MVN+ADD.
+  __ AddConstant(R0, R0, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // High registers.
+  __ AddConstant(R8, R8, 0);                          // Nothing.
+  __ AddConstant(R8, R1, 1);                          // 32-bit ADD, encoding T3,
+  __ AddConstant(R0, R8, 7);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 8);                          // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, 255);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R8, 256);                        // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 257);                        // 32-bit ADD, encoding T4.
+  __ AddConstant(R8, R1, 0xfff);                      // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R8, 0x1000);                     // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R8, 0x1001);                     // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002);                     // MOVW+ADD.
+  __ AddConstant(R0, R8, 0xffff);                     // MOVW+ADD.
+  __ AddConstant(R8, R8, 0x10000);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, 0x10001);                    // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R8, 0x10002);                    // MVN+SUB.
+  __ AddConstant(R0, R8, 0x10003);                    // MOVW+MOVT+ADD.
+  __ AddConstant(R8, R8, -1);                         // 32-bit ADD, encoding T3.
+  __ AddConstant(R8, R1, -7);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -8);                         // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R8, -255);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R1, -256);                       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -257);                       // 32-bit SUB, encoding T4.
+  __ AddConstant(R8, R8, -0xfff);                     // 32-bit SUB, encoding T4.
+  __ AddConstant(R8, R1, -0x1000);                    // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R8, -0x1001);                    // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002);                    // MOVW+SUB.
+  __ AddConstant(R8, R1, -0xffff);                    // MOVW+SUB.
+  __ AddConstant(R0, R8, -0x10000);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R8, -0x10001);                   // 32-bit SUB, encoding T3.
+  __ AddConstant(R8, R1, -0x10002);                   // MVN+SUB.
+  __ AddConstant(R0, R8, -0x10003);                   // MOVW+MOVT+ADD.
+
+  // Low registers, Rd != Rn, kCcKeep.
+  __ AddConstant(R0, R1, 0, AL, kCcKeep);             // MOV.
+  __ AddConstant(R0, R1, 1, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 7, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 8, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 255, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 256, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 257, AL, kCcKeep);           // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0xfff, AL, kCcKeep);         // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R1, 0x1000, AL, kCcKeep);        // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x1001, AL, kCcKeep);        // MVN+SUB.
+  __ AddConstant(R0, R1, 0x1002, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R1, 0xffff, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R1, 0x10000, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10001, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, 0x10002, AL, kCcKeep);       // MVN+SUB.
+  __ AddConstant(R0, R1, 0x10003, AL, kCcKeep);       // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R1, -1, AL, kCcKeep);            // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R1, -7, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -8, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -255, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -256, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -257, AL, kCcKeep);          // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0xfff, AL, kCcKeep);        // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R1, -0x1000, AL, kCcKeep);       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x1001, AL, kCcKeep);       // MVN+ADD.
+  __ AddConstant(R0, R1, -0x1002, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R1, -0xffff, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R1, -0x10000, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10001, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R1, -0x10002, AL, kCcKeep);      // MVN+ADD.
+  __ AddConstant(R0, R1, -0x10003, AL, kCcKeep);      // MOVW+MOVT+ADD.
+
+  // Low registers, Rd == Rn, kCcKeep.
+  __ AddConstant(R0, R0, 0, AL, kCcKeep);             // Nothing.
+  __ AddConstant(R1, R1, 1, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 7, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 8, AL, kCcKeep);             // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 255, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 256, AL, kCcKeep);           // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 257, AL, kCcKeep);           // 32-bit ADD, encoding T4.
+  __ AddConstant(R1, R1, 0xfff, AL, kCcKeep);         // 32-bit ADD, encoding T4.
+  __ AddConstant(R0, R0, 0x1000, AL, kCcKeep);        // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x1001, AL, kCcKeep);        // MVN+SUB.
+  __ AddConstant(R0, R0, 0x1002, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R1, R1, 0xffff, AL, kCcKeep);        // MOVW+ADD.
+  __ AddConstant(R0, R0, 0x10000, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, 0x10001, AL, kCcKeep);       // 32-bit ADD, encoding T3.
+  __ AddConstant(R0, R0, 0x10002, AL, kCcKeep);       // MVN+SUB.
+  __ AddConstant(R1, R1, 0x10003, AL, kCcKeep);       // MOVW+MOVT+ADD.
+  __ AddConstant(R0, R0, -1, AL, kCcKeep);            // 32-bit ADD, encoding T3.
+  __ AddConstant(R1, R1, -7, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -8, AL, kCcKeep);            // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -255, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -256, AL, kCcKeep);          // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257, AL, kCcKeep);          // 32-bit SUB, encoding T4.
+  __ AddConstant(R0, R0, -0xfff, AL, kCcKeep);        // 32-bit SUB, encoding T4.
+  __ AddConstant(R1, R1, -0x1000, AL, kCcKeep);       // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001, AL, kCcKeep);       // MVN+ADD.
+  __ AddConstant(R1, R1, -0x1002, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R0, R0, -0xffff, AL, kCcKeep);       // MOVW+SUB.
+  __ AddConstant(R1, R1, -0x10000, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x10001, AL, kCcKeep);      // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -0x10002, AL, kCcKeep);      // MVN+ADD.
+  __ AddConstant(R0, R0, -0x10003, AL, kCcKeep);      // MOVW+MOVT+ADD.
+
+  // Low registers, Rd != Rn, kCcSet.
+  __ AddConstant(R0, R1, 0, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 1, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 7, AL, kCcSet);              // 16-bit ADDS.
+  __ AddConstant(R0, R1, 8, AL, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 255, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 256, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 257, AL, kCcSet);            // MVN+SUBS.
+  __ AddConstant(R0, R1, 0xfff, AL, kCcSet);          // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0x1000, AL, kCcSet);         // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x1001, AL, kCcSet);         // MVN+SUBS.
+  __ AddConstant(R0, R1, 0x1002, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0xffff, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R1, 0x10000, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x10001, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R1, 0x10002, AL, kCcSet);        // MVN+SUBS.
+  __ AddConstant(R0, R1, 0x10003, AL, kCcSet);        // MOVW+MOVT+ADDS.
+  __ AddConstant(R0, R1, -1, AL, kCcSet);             // 16-bit SUBS.
+  __ AddConstant(R0, R1, -7, AL, kCcSet);             // 16-bit SUBS.
+  __ AddConstant(R0, R1, -8, AL, kCcSet);             // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -255, AL, kCcSet);           // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -256, AL, kCcSet);           // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -257, AL, kCcSet);           // MVN+ADDS.
+  __ AddConstant(R0, R1, -0xfff, AL, kCcSet);         // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0x1000, AL, kCcSet);        // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x1001, AL, kCcSet);        // MVN+ADDS.
+  __ AddConstant(R0, R1, -0x1002, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0xffff, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R1, -0x10000, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x10001, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R1, -0x10002, AL, kCcSet);       // MVN+ADDS.
+  __ AddConstant(R0, R1, -0x10003, AL, kCcSet);       // MOVW+MOVT+ADDS.
+
+  // Low registers, Rd == Rn, kCcSet.
+  __ AddConstant(R0, R0, 0, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 1, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 7, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 8, AL, kCcSet);              // 16-bit ADDS, encoding T2.
+  __ AddConstant(R0, R0, 255, AL, kCcSet);            // 16-bit ADDS, encoding T2.
+  __ AddConstant(R1, R1, 256, AL, kCcSet);            // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R0, 257, AL, kCcSet);            // MVN+SUBS.
+  __ AddConstant(R1, R1, 0xfff, AL, kCcSet);          // MOVW+ADDS.
+  __ AddConstant(R0, R0, 0x1000, AL, kCcSet);         // 32-bit ADDS, encoding T3.
+  __ AddConstant(R1, R1, 0x1001, AL, kCcSet);         // MVN+SUBS.
+  __ AddConstant(R0, R0, 0x1002, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R1, R1, 0xffff, AL, kCcSet);         // MOVW+ADDS.
+  __ AddConstant(R0, R0, 0x10000, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R1, R1, 0x10001, AL, kCcSet);        // 32-bit ADDS, encoding T3.
+  __ AddConstant(R0, R0, 0x10002, AL, kCcSet);        // MVN+SUBS.
+  __ AddConstant(R1, R1, 0x10003, AL, kCcSet);        // MOVW+MOVT+ADDS.
+  __ AddConstant(R0, R0, -1, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -7, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -8, AL, kCcSet);             // 16-bit SUBS, encoding T2.
+  __ AddConstant(R1, R1, -255, AL, kCcSet);           // 16-bit SUBS, encoding T2.
+  __ AddConstant(R0, R0, -256, AL, kCcSet);           // 32-bit SUB, encoding T3.
+  __ AddConstant(R1, R1, -257, AL, kCcSet);           // MNV+ADDS.
+  __ AddConstant(R0, R0, -0xfff, AL, kCcSet);         // MOVW+SUBS.
+  __ AddConstant(R1, R1, -0x1000, AL, kCcSet);        // 32-bit SUB, encoding T3.
+  __ AddConstant(R0, R0, -0x1001, AL, kCcSet);        // MVN+ADDS.
+  __ AddConstant(R1, R1, -0x1002, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R0, R0, -0xffff, AL, kCcSet);        // MOVW+SUBS.
+  __ AddConstant(R1, R1, -0x10000, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R0, R0, -0x10001, AL, kCcSet);       // 32-bit SUBS, encoding T3.
+  __ AddConstant(R1, R1, -0x10002, AL, kCcSet);       // MVN+ADDS.
+  __ AddConstant(R0, R0, -0x10003, AL, kCcSet);       // MOVW+MOVT+ADDS.
+
+  __ it(EQ);
+  __ AddConstant(R0, R1, 1, EQ, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ it(NE);
+  __ AddConstant(R0, R1, 1, NE, kCcKeep);             // 16-bit ADDS, encoding T1.
+  __ it(GE);
+  __ AddConstant(R0, R0, 1, GE, kCcSet);              // 32-bit ADDS, encoding T3.
+  __ it(LE);
+  __ AddConstant(R0, R0, 1, LE, kCcKeep);             // 16-bit ADDS, encoding T2.
+
+  EmitAndCheck(&assembler, "AddConstant");
+}
+
 #undef __
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 886295e..f07f8c7 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5052,6 +5052,324 @@
   nullptr
 };
 
+const char* AddConstantResults[] = {
+  "   0:	4608      	mov	r0, r1\n",
+  "   2:	1c48      	adds	r0, r1, #1\n",
+  "   4:	1dc8      	adds	r0, r1, #7\n",
+  "   6:	f101 0008 	add.w	r0, r1, #8\n",
+  "   a:	f101 00ff 	add.w	r0, r1, #255	; 0xff\n",
+  "   e:	f501 7080 	add.w	r0, r1, #256	; 0x100\n",
+  "  12:	f201 1001 	addw	r0, r1, #257	; 0x101\n",
+  "  16:	f601 70ff 	addw	r0, r1, #4095	; 0xfff\n",
+  "  1a:	f501 5080 	add.w	r0, r1, #4096	; 0x1000\n",
+  "  1e:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  "  22:	1a08      	subs	r0, r1, r0\n",
+  "  24:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  "  28:	1808      	adds	r0, r1, r0\n",
+  "  2a:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  "  2e:	1808      	adds	r0, r1, r0\n",
+  "  30:	f501 3080 	add.w	r0, r1, #65536	; 0x10000\n",
+  "  34:	f101 1001 	add.w	r0, r1, #65537	; 0x10001\n",
+  "  38:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  "  3c:	1a08      	subs	r0, r1, r0\n",
+  "  3e:	f240 0003 	movw	r0, #3\n",
+  "  42:	f2c0 0001 	movt	r0, #1\n",
+  "  46:	1808      	adds	r0, r1, r0\n",
+  "  48:	1e48      	subs	r0, r1, #1\n",
+  "  4a:	1fc8      	subs	r0, r1, #7\n",
+  "  4c:	f1a1 0008 	sub.w	r0, r1, #8\n",
+  "  50:	f1a1 00ff 	sub.w	r0, r1, #255	; 0xff\n",
+  "  54:	f5a1 7080 	sub.w	r0, r1, #256	; 0x100\n",
+  "  58:	f2a1 1001 	subw	r0, r1, #257	; 0x101\n",
+  "  5c:	f6a1 70ff 	subw	r0, r1, #4095	; 0xfff\n",
+  "  60:	f5a1 5080 	sub.w	r0, r1, #4096	; 0x1000\n",
+  "  64:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  "  68:	1808      	adds	r0, r1, r0\n",
+  "  6a:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  "  6e:	1a08      	subs	r0, r1, r0\n",
+  "  70:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  "  74:	1a08      	subs	r0, r1, r0\n",
+  "  76:	f5a1 3080 	sub.w	r0, r1, #65536	; 0x10000\n",
+  "  7a:	f1a1 1001 	sub.w	r0, r1, #65537	; 0x10001\n",
+  "  7e:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  "  82:	1808      	adds	r0, r1, r0\n",
+  "  84:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  "  88:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  "  8c:	1808      	adds	r0, r1, r0\n",
+  "  8e:	3101      	adds	r1, #1\n",
+  "  90:	3007      	adds	r0, #7\n",
+  "  92:	3108      	adds	r1, #8\n",
+  "  94:	30ff      	adds	r0, #255	; 0xff\n",
+  "  96:	f501 7180 	add.w	r1, r1, #256	; 0x100\n",
+  "  9a:	f200 1001 	addw	r0, r0, #257	; 0x101\n",
+  "  9e:	f601 71ff 	addw	r1, r1, #4095	; 0xfff\n",
+  "  a2:	f500 5080 	add.w	r0, r0, #4096	; 0x1000\n",
+  "  a6:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  aa:	eba1 010c 	sub.w	r1, r1, ip\n",
+  "  ae:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  b2:	4460      	add	r0, ip\n",
+  "  b4:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  b8:	4461      	add	r1, ip\n",
+  "  ba:	f500 3080 	add.w	r0, r0, #65536	; 0x10000\n",
+  "  be:	f101 1101 	add.w	r1, r1, #65537	; 0x10001\n",
+  "  c2:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  "  c6:	eba0 000c 	sub.w	r0, r0, ip\n",
+  "  ca:	f240 0c03 	movw	ip, #3\n",
+  "  ce:	f2c0 0c01 	movt	ip, #1\n",
+  "  d2:	4461      	add	r1, ip\n",
+  "  d4:	3801      	subs	r0, #1\n",
+  "  d6:	3907      	subs	r1, #7\n",
+  "  d8:	3808      	subs	r0, #8\n",
+  "  da:	39ff      	subs	r1, #255	; 0xff\n",
+  "  dc:	f5a0 7080 	sub.w	r0, r0, #256	; 0x100\n",
+  "  e0:	f2a1 1101 	subw	r1, r1, #257	; 0x101\n",
+  "  e4:	f6a0 70ff 	subw	r0, r0, #4095	; 0xfff\n",
+  "  e8:	f5a1 5180 	sub.w	r1, r1, #4096	; 0x1000\n",
+  "  ec:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  "  f0:	4460      	add	r0, ip\n",
+  "  f2:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  "  f6:	eba1 010c 	sub.w	r1, r1, ip\n",
+  "  fa:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  "  fe:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 102:	f5a1 3180 	sub.w	r1, r1, #65536	; 0x10000\n",
+  " 106:	f1a0 1001 	sub.w	r0, r0, #65537	; 0x10001\n",
+  " 10a:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 10e:	4461      	add	r1, ip\n",
+  " 110:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 114:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 118:	4460      	add	r0, ip\n",
+  " 11a:	f101 0801 	add.w	r8, r1, #1\n",
+  " 11e:	f108 0007 	add.w	r0, r8, #7\n",
+  " 122:	f108 0808 	add.w	r8, r8, #8\n",
+  " 126:	f101 08ff 	add.w	r8, r1, #255	; 0xff\n",
+  " 12a:	f508 7080 	add.w	r0, r8, #256	; 0x100\n",
+  " 12e:	f208 1801 	addw	r8, r8, #257	; 0x101\n",
+  " 132:	f601 78ff 	addw	r8, r1, #4095	; 0xfff\n",
+  " 136:	f508 5080 	add.w	r0, r8, #4096	; 0x1000\n",
+  " 13a:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 13e:	eba8 080c 	sub.w	r8, r8, ip\n",
+  " 142:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 146:	1808      	adds	r0, r1, r0\n",
+  " 148:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 14c:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 150:	f508 3880 	add.w	r8, r8, #65536	; 0x10000\n",
+  " 154:	f101 1801 	add.w	r8, r1, #65537	; 0x10001\n",
+  " 158:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 15c:	eba8 0000 	sub.w	r0, r8, r0\n",
+  " 160:	f240 0003 	movw	r0, #3\n",
+  " 164:	f2c0 0001 	movt	r0, #1\n",
+  " 168:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 16c:	f108 38ff 	add.w	r8, r8, #4294967295	; 0xffffffff\n",
+  " 170:	f1a1 0807 	sub.w	r8, r1, #7\n",
+  " 174:	f1a8 0008 	sub.w	r0, r8, #8\n",
+  " 178:	f1a8 08ff 	sub.w	r8, r8, #255	; 0xff\n",
+  " 17c:	f5a1 7880 	sub.w	r8, r1, #256	; 0x100\n",
+  " 180:	f2a8 1001 	subw	r0, r8, #257	; 0x101\n",
+  " 184:	f6a8 78ff 	subw	r8, r8, #4095	; 0xfff\n",
+  " 188:	f5a1 5880 	sub.w	r8, r1, #4096	; 0x1000\n",
+  " 18c:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 190:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 194:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 198:	1a08      	subs	r0, r1, r0\n",
+  " 19a:	f64f 78ff 	movw	r8, #65535	; 0xffff\n",
+  " 19e:	eba1 0808 	sub.w	r8, r1, r8\n",
+  " 1a2:	f5a8 3080 	sub.w	r0, r8, #65536	; 0x10000\n",
+  " 1a6:	f1a8 1801 	sub.w	r8, r8, #65537	; 0x10001\n",
+  " 1aa:	f06f 1801 	mvn.w	r8, #65537	; 0x10001\n",
+  " 1ae:	eb01 0808 	add.w	r8, r1, r8\n",
+  " 1b2:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 1b6:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 1ba:	eb08 0000 	add.w	r0, r8, r0\n",
+  " 1be:	4608      	mov	r0, r1\n",
+  " 1c0:	f101 0001 	add.w	r0, r1, #1\n",
+  " 1c4:	f101 0007 	add.w	r0, r1, #7\n",
+  " 1c8:	f101 0008 	add.w	r0, r1, #8\n",
+  " 1cc:	f101 00ff 	add.w	r0, r1, #255	; 0xff\n",
+  " 1d0:	f501 7080 	add.w	r0, r1, #256	; 0x100\n",
+  " 1d4:	f201 1001 	addw	r0, r1, #257	; 0x101\n",
+  " 1d8:	f601 70ff 	addw	r0, r1, #4095	; 0xfff\n",
+  " 1dc:	f501 5080 	add.w	r0, r1, #4096	; 0x1000\n",
+  " 1e0:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 1e4:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 1e8:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 1ec:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 1f0:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 1f4:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 1f8:	f501 3080 	add.w	r0, r1, #65536	; 0x10000\n",
+  " 1fc:	f101 1001 	add.w	r0, r1, #65537	; 0x10001\n",
+  " 200:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 204:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 208:	f240 0003 	movw	r0, #3\n",
+  " 20c:	f2c0 0001 	movt	r0, #1\n",
+  " 210:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 214:	f101 30ff 	add.w	r0, r1, #4294967295	; 0xffffffff\n",
+  " 218:	f1a1 0007 	sub.w	r0, r1, #7\n",
+  " 21c:	f1a1 0008 	sub.w	r0, r1, #8\n",
+  " 220:	f1a1 00ff 	sub.w	r0, r1, #255	; 0xff\n",
+  " 224:	f5a1 7080 	sub.w	r0, r1, #256	; 0x100\n",
+  " 228:	f2a1 1001 	subw	r0, r1, #257	; 0x101\n",
+  " 22c:	f6a1 70ff 	subw	r0, r1, #4095	; 0xfff\n",
+  " 230:	f5a1 5080 	sub.w	r0, r1, #4096	; 0x1000\n",
+  " 234:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 238:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 23c:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 240:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 244:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 248:	eba1 0000 	sub.w	r0, r1, r0\n",
+  " 24c:	f5a1 3080 	sub.w	r0, r1, #65536	; 0x10000\n",
+  " 250:	f1a1 1001 	sub.w	r0, r1, #65537	; 0x10001\n",
+  " 254:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 258:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 25c:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 260:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 264:	eb01 0000 	add.w	r0, r1, r0\n",
+  " 268:	f101 0101 	add.w	r1, r1, #1\n",
+  " 26c:	f100 0007 	add.w	r0, r0, #7\n",
+  " 270:	f101 0108 	add.w	r1, r1, #8\n",
+  " 274:	f100 00ff 	add.w	r0, r0, #255	; 0xff\n",
+  " 278:	f501 7180 	add.w	r1, r1, #256	; 0x100\n",
+  " 27c:	f200 1001 	addw	r0, r0, #257	; 0x101\n",
+  " 280:	f601 71ff 	addw	r1, r1, #4095	; 0xfff\n",
+  " 284:	f500 5080 	add.w	r0, r0, #4096	; 0x1000\n",
+  " 288:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 28c:	eba1 010c 	sub.w	r1, r1, ip\n",
+  " 290:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 294:	4460      	add	r0, ip\n",
+  " 296:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 29a:	4461      	add	r1, ip\n",
+  " 29c:	f500 3080 	add.w	r0, r0, #65536	; 0x10000\n",
+  " 2a0:	f101 1101 	add.w	r1, r1, #65537	; 0x10001\n",
+  " 2a4:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 2a8:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 2ac:	f240 0c03 	movw	ip, #3\n",
+  " 2b0:	f2c0 0c01 	movt	ip, #1\n",
+  " 2b4:	4461      	add	r1, ip\n",
+  " 2b6:	f100 30ff 	add.w	r0, r0, #4294967295	; 0xffffffff\n",
+  " 2ba:	f1a1 0107 	sub.w	r1, r1, #7\n",
+  " 2be:	f1a0 0008 	sub.w	r0, r0, #8\n",
+  " 2c2:	f1a1 01ff 	sub.w	r1, r1, #255	; 0xff\n",
+  " 2c6:	f5a0 7080 	sub.w	r0, r0, #256	; 0x100\n",
+  " 2ca:	f2a1 1101 	subw	r1, r1, #257	; 0x101\n",
+  " 2ce:	f6a0 70ff 	subw	r0, r0, #4095	; 0xfff\n",
+  " 2d2:	f5a1 5180 	sub.w	r1, r1, #4096	; 0x1000\n",
+  " 2d6:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 2da:	4460      	add	r0, ip\n",
+  " 2dc:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 2e0:	eba1 010c 	sub.w	r1, r1, ip\n",
+  " 2e4:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 2e8:	eba0 000c 	sub.w	r0, r0, ip\n",
+  " 2ec:	f5a1 3180 	sub.w	r1, r1, #65536	; 0x10000\n",
+  " 2f0:	f1a0 1001 	sub.w	r0, r0, #65537	; 0x10001\n",
+  " 2f4:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 2f8:	4461      	add	r1, ip\n",
+  " 2fa:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 2fe:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 302:	4460      	add	r0, ip\n",
+  " 304:	1c08      	adds	r0, r1, #0\n",
+  " 306:	1c48      	adds	r0, r1, #1\n",
+  " 308:	1dc8      	adds	r0, r1, #7\n",
+  " 30a:	f111 0008 	adds.w	r0, r1, #8\n",
+  " 30e:	f111 00ff 	adds.w	r0, r1, #255	; 0xff\n",
+  " 312:	f511 7080 	adds.w	r0, r1, #256	; 0x100\n",
+  " 316:	f46f 7080 	mvn.w	r0, #256	; 0x100\n",
+  " 31a:	1a08      	subs	r0, r1, r0\n",
+  " 31c:	f640 70ff 	movw	r0, #4095	; 0xfff\n",
+  " 320:	1808      	adds	r0, r1, r0\n",
+  " 322:	f511 5080 	adds.w	r0, r1, #4096	; 0x1000\n",
+  " 326:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 32a:	1a08      	subs	r0, r1, r0\n",
+  " 32c:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 330:	1808      	adds	r0, r1, r0\n",
+  " 332:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 336:	1808      	adds	r0, r1, r0\n",
+  " 338:	f511 3080 	adds.w	r0, r1, #65536	; 0x10000\n",
+  " 33c:	f111 1001 	adds.w	r0, r1, #65537	; 0x10001\n",
+  " 340:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 344:	1a08      	subs	r0, r1, r0\n",
+  " 346:	f240 0003 	movw	r0, #3\n",
+  " 34a:	f2c0 0001 	movt	r0, #1\n",
+  " 34e:	1808      	adds	r0, r1, r0\n",
+  " 350:	1e48      	subs	r0, r1, #1\n",
+  " 352:	1fc8      	subs	r0, r1, #7\n",
+  " 354:	f1b1 0008 	subs.w	r0, r1, #8\n",
+  " 358:	f1b1 00ff 	subs.w	r0, r1, #255	; 0xff\n",
+  " 35c:	f5b1 7080 	subs.w	r0, r1, #256	; 0x100\n",
+  " 360:	f46f 7080 	mvn.w	r0, #256	; 0x100\n",
+  " 364:	1808      	adds	r0, r1, r0\n",
+  " 366:	f640 70ff 	movw	r0, #4095	; 0xfff\n",
+  " 36a:	1a08      	subs	r0, r1, r0\n",
+  " 36c:	f5b1 5080 	subs.w	r0, r1, #4096	; 0x1000\n",
+  " 370:	f46f 5080 	mvn.w	r0, #4096	; 0x1000\n",
+  " 374:	1808      	adds	r0, r1, r0\n",
+  " 376:	f241 0002 	movw	r0, #4098	; 0x1002\n",
+  " 37a:	1a08      	subs	r0, r1, r0\n",
+  " 37c:	f64f 70ff 	movw	r0, #65535	; 0xffff\n",
+  " 380:	1a08      	subs	r0, r1, r0\n",
+  " 382:	f5b1 3080 	subs.w	r0, r1, #65536	; 0x10000\n",
+  " 386:	f1b1 1001 	subs.w	r0, r1, #65537	; 0x10001\n",
+  " 38a:	f06f 1001 	mvn.w	r0, #65537	; 0x10001\n",
+  " 38e:	1808      	adds	r0, r1, r0\n",
+  " 390:	f64f 70fd 	movw	r0, #65533	; 0xfffd\n",
+  " 394:	f6cf 70fe 	movt	r0, #65534	; 0xfffe\n",
+  " 398:	1808      	adds	r0, r1, r0\n",
+  " 39a:	3000      	adds	r0, #0\n",
+  " 39c:	3101      	adds	r1, #1\n",
+  " 39e:	3007      	adds	r0, #7\n",
+  " 3a0:	3108      	adds	r1, #8\n",
+  " 3a2:	30ff      	adds	r0, #255	; 0xff\n",
+  " 3a4:	f511 7180 	adds.w	r1, r1, #256	; 0x100\n",
+  " 3a8:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 3ac:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 3b0:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 3b4:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3b8:	f510 5080 	adds.w	r0, r0, #4096	; 0x1000\n",
+  " 3bc:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 3c0:	ebb1 010c 	subs.w	r1, r1, ip\n",
+  " 3c4:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 3c8:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 3cc:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 3d0:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3d4:	f510 3080 	adds.w	r0, r0, #65536	; 0x10000\n",
+  " 3d8:	f111 1101 	adds.w	r1, r1, #65537	; 0x10001\n",
+  " 3dc:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 3e0:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 3e4:	f240 0c03 	movw	ip, #3\n",
+  " 3e8:	f2c0 0c01 	movt	ip, #1\n",
+  " 3ec:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 3f0:	3801      	subs	r0, #1\n",
+  " 3f2:	3907      	subs	r1, #7\n",
+  " 3f4:	3808      	subs	r0, #8\n",
+  " 3f6:	39ff      	subs	r1, #255	; 0xff\n",
+  " 3f8:	f5b0 7080 	subs.w	r0, r0, #256	; 0x100\n",
+  " 3fc:	f46f 7c80 	mvn.w	ip, #256	; 0x100\n",
+  " 400:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 404:	f640 7cff 	movw	ip, #4095	; 0xfff\n",
+  " 408:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 40c:	f5b1 5180 	subs.w	r1, r1, #4096	; 0x1000\n",
+  " 410:	f46f 5c80 	mvn.w	ip, #4096	; 0x1000\n",
+  " 414:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 418:	f241 0c02 	movw	ip, #4098	; 0x1002\n",
+  " 41c:	ebb1 010c 	subs.w	r1, r1, ip\n",
+  " 420:	f64f 7cff 	movw	ip, #65535	; 0xffff\n",
+  " 424:	ebb0 000c 	subs.w	r0, r0, ip\n",
+  " 428:	f5b1 3180 	subs.w	r1, r1, #65536	; 0x10000\n",
+  " 42c:	f1b0 1001 	subs.w	r0, r0, #65537	; 0x10001\n",
+  " 430:	f06f 1c01 	mvn.w	ip, #65537	; 0x10001\n",
+  " 434:	eb11 010c 	adds.w	r1, r1, ip\n",
+  " 438:	f64f 7cfd 	movw	ip, #65533	; 0xfffd\n",
+  " 43c:	f6cf 7cfe 	movt	ip, #65534	; 0xfffe\n",
+  " 440:	eb10 000c 	adds.w	r0, r0, ip\n",
+  " 444:	bf08      	it	eq\n",
+  " 446:	f111 0001 	addseq.w	r0, r1, #1\n",
+  " 44a:	bf18      	it	ne\n",
+  " 44c:	1c48      	addne	r0, r1, #1\n",
+  " 44e:	bfa8      	it	ge\n",
+  " 450:	f110 0001 	addsge.w	r0, r0, #1\n",
+  " 454:	bfd8      	it	le\n",
+  " 456:	3001      	addle	r0, #1\n",
+  nullptr
+};
+
 std::map<std::string, const char* const*> test_results;
 void setup_results() {
     test_results["SimpleMov"] = SimpleMovResults;
@@ -5102,4 +5420,5 @@
     test_results["LoadStoreLiteral"] = LoadStoreLiteralResults;
     test_results["LoadStoreLimits"] = LoadStoreLimitsResults;
     test_results["CompareAndBranch"] = CompareAndBranchResults;
+    test_results["AddConstant"] = AddConstantResults;
 }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 107d5bb..cfd8421 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -616,6 +616,14 @@
   EmitI21(0x3E, rs, imm21);
 }
 
+void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
 void Mips64Assembler::EmitBcondc(BranchCondition cond,
                                  GpuRegister rs,
                                  GpuRegister rt,
@@ -669,6 +677,14 @@
     case kCondGEU:
       Bgeuc(rs, rt, imm16_21);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FpuRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FpuRegister>(rs), imm16_21);
+      break;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
       UNREACHABLE();
@@ -827,6 +843,86 @@
   EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
 }
 
+void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
 void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
 }
@@ -1134,6 +1230,10 @@
       CHECK_NE(lhs_reg, ZERO);
       CHECK_EQ(rhs_reg, ZERO);
       break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
     case kUncond:
       UNREACHABLE();
   }
@@ -1188,6 +1288,10 @@
       return kCondGEU;
     case kCondGEU:
       return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
   }
@@ -1567,7 +1671,7 @@
     case Branch::kCondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       EmitBcondc(condition, lhs, rhs, offset);
-      Nop();  // TODO: improve by filling the forbidden slot.
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
       break;
     case Branch::kCall:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1657,6 +1761,14 @@
   Bcond(label, kCondNEZ, rs);
 }
 
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
+}
+
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
   if (!IsInt<16>(offset)) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 57fc19a..883f013 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -227,6 +227,8 @@
   void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
   void Beqzc(GpuRegister rs, uint32_t imm21);
   void Bnezc(GpuRegister rs, uint32_t imm21);
+  void Bc1eqz(FpuRegister ft, uint16_t imm16);
+  void Bc1nez(FpuRegister ft, uint16_t imm16);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -266,6 +268,26 @@
   void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
 
   void Cvtsw(FpuRegister fd, FpuRegister fs);
   void Cvtdw(FpuRegister fd, FpuRegister fs);
@@ -317,6 +339,8 @@
   void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
   void Beqzc(GpuRegister rs, Mips64Label* label);
   void Bnezc(GpuRegister rs, Mips64Label* label);
+  void Bc1eqz(FpuRegister ft, Mips64Label* label);
+  void Bc1nez(FpuRegister ft, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -474,6 +498,8 @@
     kCondNEZ,
     kCondLTU,
     kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
     kUncond,
   };
   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 29a5a88..bac4375 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -403,6 +403,106 @@
   DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
 }
 
+TEST_F(AssemblerMIPS64Test, CmpUnS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUnD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.d");
+}
+
 TEST_F(AssemblerMIPS64Test, CvtDL) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
 }
@@ -591,6 +691,58 @@
   BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
 }
 
+TEST_F(AssemblerMIPS64Test, Bc1eqz) {
+    mips64::Mips64Label label;
+    __ Bc1eqz(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1eqz(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1eqz $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1eqz $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1nez) {
+    mips64::Mips64Label label;
+    __ Bc1nez(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1nez(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1nez $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1nez $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1nez");
+}
+
 TEST_F(AssemblerMIPS64Test, LongBeqc) {
   mips64::Mips64Label label;
   __ Beqc(mips64::A0, mips64::A1, &label);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6fae8e4..21ce73c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -315,10 +315,13 @@
   UsageError("      stripped using standard command line tools such as strip or objcopy.");
   UsageError("      (enabled by default in debug builds, disabled by default otherwise)");
   UsageError("");
-  UsageError("  --debuggable: Produce debuggable code. Implies --generate-debug-info.");
-  UsageError("");
   UsageError("  --no-generate-debug-info: Do not generate debug information for native debugging.");
   UsageError("");
+  UsageError("  --debuggable: Produce code debuggable with Java debugger. Implies -g.");
+  UsageError("");
+  UsageError("  --native-debuggable: Produce code debuggable with native debugger (like LLDB).");
+  UsageError("      Implies --debuggable.");
+  UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
   UsageError("      such as initial heap size, maximum heap size, and verbose output.");
   UsageError("      Use a separate --runtime-arg switch for each argument.");
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 5833129..bad928e 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -2380,7 +2380,7 @@
 static int DumpImage(Runtime* runtime, const char* image_location, OatDumperOptions* options,
                      std::ostream* os) {
   // Dumping the image, no explicit class loader.
-  NullHandle<mirror::ClassLoader> null_class_loader;
+  ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   ScopedObjectAccess soa(Thread::Current());
@@ -2439,7 +2439,7 @@
 static int DumpOatWithoutRuntime(OatFile* oat_file, OatDumperOptions* options, std::ostream* os) {
   CHECK(oat_file != nullptr && options != nullptr);
   // No image = no class loader.
-  NullHandle<mirror::ClassLoader> null_class_loader;
+  ScopedNullHandle<mirror::ClassLoader> null_class_loader;
   options->class_loader_ = &null_class_loader;
 
   OatDumper oat_dumper(*oat_file, *options);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index ee8e690..a854650 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -102,6 +102,7 @@
   jdwp/jdwp_socket.cc \
   jdwp/object_registry.cc \
   jni_env_ext.cc \
+  jit/debugger_interface.cc \
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index d6ba304..1680bbd 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -30,6 +30,13 @@
     options->push_back(std::make_pair("imageinstructionset", "x86_64"));
   }
 
+  // Do not do any of the finalization. We don't want to run any code, we don't need the heap
+  // prepared, it actually will be a problem with setting the instruction set to x86_64 in
+  // SetUpRuntimeOptions.
+  void FinalizeSetup() OVERRIDE {
+    ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
+  }
+
   static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* const runtime = Runtime::Current();
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index 28d1942..51f992b 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -42,15 +42,15 @@
   // Look for variants that have divide support.
   static const char* arm_variants_with_div[] = {
           "cortex-a7", "cortex-a12", "cortex-a15", "cortex-a17", "cortex-a53", "cortex-a57",
-          "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
-          "cyclone", "denver", "krait", "swift"};
+          "cortex-a53.a57", "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
+          "cyclone", "denver", "krait", "swift" };
 
   bool has_div = FindVariantInArray(arm_variants_with_div, arraysize(arm_variants_with_div),
                                     variant);
 
   // Look for variants that have LPAE support.
   static const char* arm_variants_with_lpae[] = {
-      "cortex-a7", "cortex-a15", "krait", "denver"
+      "cortex-a7", "cortex-a15", "krait", "denver", "cortex-a53", "cortex-a57", "cortex-a53.a57"
   };
   bool has_lpae = FindVariantInArray(arm_variants_with_lpae, arraysize(arm_variants_with_lpae),
                                      variant);
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index ef39999..42f5df4 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -45,6 +45,11 @@
     "silvermont",
 };
 
+static constexpr const char* x86_variants_prefer_locked_add_sync[] = {
+    "atom",
+    "silvermont",
+};
+
 const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
@@ -60,6 +65,10 @@
   bool has_AVX = false;
   bool has_AVX2 = false;
 
+  bool prefers_locked_add = FindVariantInArray(x86_variants_prefer_locked_add_sync,
+                                               arraysize(x86_variants_prefer_locked_add_sync),
+                                               variant);
+
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
@@ -68,10 +77,10 @@
 
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
@@ -83,11 +92,13 @@
   bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
+  bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+                                                has_AVX, has_AVX2, prefers_locked_add);
   } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+                                             has_AVX, has_AVX2, prefers_locked_add);
   }
 }
 
@@ -124,11 +135,15 @@
   const bool has_AVX2 = true;
 #endif
 
+  // No #define for memory synchronization preference.
+  const bool prefers_locked_add = false;
+
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                                has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
@@ -141,6 +156,8 @@
   bool has_SSE4_2 = false;
   bool has_AVX = false;
   bool has_AVX2 = false;
+  // No cpuinfo for memory synchronization preference.
+  const bool prefers_locked_add = false;
 
   std::ifstream in("/proc/cpuinfo");
   if (!in.fail()) {
@@ -177,10 +194,11 @@
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
   if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                                has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
@@ -204,7 +222,8 @@
       (has_SSE4_1_ == other_as_x86->has_SSE4_1_) &&
       (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
       (has_AVX_ == other_as_x86->has_AVX_) &&
-      (has_AVX2_ == other_as_x86->has_AVX2_);
+      (has_AVX2_ == other_as_x86->has_AVX2_) &&
+      (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
 }
 
 uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -213,7 +232,8 @@
       (has_SSE4_1_ ? kSse4_1Bitfield : 0) |
       (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
       (has_AVX_ ? kAvxBitfield : 0) |
-      (has_AVX2_ ? kAvx2Bitfield : 0);
+      (has_AVX2_ ? kAvx2Bitfield : 0) |
+      (prefers_locked_add_ ? kPrefersLockedAdd : 0);
 }
 
 std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -248,6 +268,11 @@
   } else {
     result += ",-avx2";
   }
+  if (prefers_locked_add_) {
+    result += ",lock_add";
+  } else {
+    result += ",-lock_add";
+  }
   return result;
 }
 
@@ -259,6 +284,7 @@
   bool has_SSE4_2 = has_SSE4_2_;
   bool has_AVX = has_AVX_;
   bool has_AVX2 = has_AVX2_;
+  bool prefers_locked_add = prefers_locked_add_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = Trim(*i);
     if (feature == "ssse3") {
@@ -281,6 +307,10 @@
       has_AVX2 = true;
     } else if (feature == "-avx2") {
       has_AVX2 = false;
+    } else if (feature == "lock_add") {
+      prefers_locked_add = true;
+    } else if (feature == "-lock_add") {
+      prefers_locked_add = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
@@ -288,10 +318,10 @@
   }
   if (x86_64) {
     return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                                has_AVX2, prefers_locked_add);
   } else {
     return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2);
+                                            has_AVX2, prefers_locked_add);
   }
 }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 7b61245..2b845f8 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -60,6 +60,8 @@
 
   bool HasSSE4_1() const { return has_SSE4_1_; }
 
+  bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
+
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
   virtual const InstructionSetFeatures*
@@ -73,9 +75,10 @@
                                  bool x86_64, std::string* error_msg) const;
 
   X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                            bool has_AVX, bool has_AVX2)
+                            bool has_AVX, bool has_AVX2, bool prefers_locked_add)
       : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
-        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2) {
+        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
+        prefers_locked_add_(prefers_locked_add) {
   }
 
  private:
@@ -87,6 +90,7 @@
     kSse4_2Bitfield = 8,
     kAvxBitfield = 16,
     kAvx2Bitfield = 32,
+    kPrefersLockedAdd = 64,
   };
 
   const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
@@ -94,6 +98,7 @@
   const bool has_SSE4_2_;  // x86 128bit SIMD SSE4.2.
   const bool has_AVX_;     // x86 256bit SIMD AVX.
   const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
+  const bool prefers_locked_add_;  // x86 use locked add for memory synchronization.
 
   DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
 };
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index 25a406b..e8d01e6 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,8 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+               x86_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_features->AsBitmap(), 1U);
 }
 
@@ -39,8 +40,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 3U);
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 67U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -48,7 +50,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -58,9 +60,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
@@ -75,8 +77,9 @@
   ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 15U);
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 79U);
 
   // Build features for a 32-bit x86 default processor.
   std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -84,7 +87,7 @@
   ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
   EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
                x86_default_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
 
@@ -94,9 +97,9 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
                x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 15U);
+  EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
 
   EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
   EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index 3280177..b8000d0 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,8 +74,9 @@
 
  private:
   X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
-                               bool has_AVX, bool has_AVX2)
-      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2) {
+                               bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                  has_AVX2, prefers_locked_add) {
   }
 
   friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 5171080..4562c64 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
   ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
   EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
                x86_64_features->GetFeatureString().c_str());
   EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
 }
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 238d9f3..effa1c5 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -146,10 +146,9 @@
       mirror::IfTable* iftable = GetDeclaringClass()->GetIfTable();
       for (size_t i = 0; i < iftable->Count() && result == nullptr; i++) {
         mirror::Class* interface = iftable->GetInterface(i);
-        for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-          ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
-            result = interface_method;
+        for (ArtMethod& interface_method : interface->GetVirtualMethods(pointer_size)) {
+          if (HasSameNameAndSignature(interface_method.GetInterfaceMethodIfProxy(pointer_size))) {
+            result = &interface_method;
             break;
           }
         }
@@ -157,8 +156,8 @@
     }
   }
   DCHECK(result == nullptr ||
-         GetInterfaceMethodIfProxy(sizeof(void*))->HasSameNameAndSignature(
-             result->GetInterfaceMethodIfProxy(sizeof(void*))));
+         GetInterfaceMethodIfProxy(pointer_size)->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(pointer_size)));
   return result;
 }
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index c86614c..2bc6c79 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -163,13 +163,13 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (72 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (104 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (116 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_STATUS_OFFSET (108 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
diff --git a/runtime/base/array_slice.h b/runtime/base/array_slice.h
new file mode 100644
index 0000000..19ad302
--- /dev/null
+++ b/runtime/base/array_slice.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_ARRAY_SLICE_H_
+#define ART_RUNTIME_BASE_ARRAY_SLICE_H_
+
+#include "length_prefixed_array.h"
+#include "stride_iterator.h"
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/iteration_range.h"
+
+namespace art {
+
+// An ArraySlice is an abstraction over an array or a part of an array of a particular type. It does
+// bounds checking and can be made from several common array-like structures in Art.
+template<typename T>
+class ArraySlice {
+ public:
+  // Create an empty array slice.
+  ArraySlice() : array_(nullptr), size_(0), element_size_(0) {}
+
+  // Create an array slice of the first 'length' elements of the array, with each element being
+  // element_size bytes long.
+  ArraySlice(T* array,
+             size_t length,
+             size_t element_size = sizeof(T))
+      : array_(array),
+        size_(dchecked_integral_cast<uint32_t>(length)),
+        element_size_(element_size) {
+    DCHECK(array_ != nullptr || length == 0);
+  }
+
+  // Create an array slice of the elements between start_offset and end_offset of the array with
+  // each element being element_size bytes long. Both start_offset and end_offset are in
+  // element_size units.
+  ArraySlice(T* array,
+             uint32_t start_offset,
+             uint32_t end_offset,
+             size_t element_size = sizeof(T))
+      : array_(nullptr),
+        size_(end_offset - start_offset),
+        element_size_(element_size) {
+    DCHECK(array_ != nullptr || size_ == 0);
+    DCHECK_LE(start_offset, end_offset);
+    if (size_ != 0) {
+      uintptr_t offset = start_offset * element_size_;
+      array_ = *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array) + offset);
+    }
+  }
+
+  // Create an array slice of the elements between start_offset and end_offset of the array with
+  // each element being element_size bytes long and having the given alignment. Both start_offset
+  // and end_offset are in element_size units.
+  ArraySlice(LengthPrefixedArray<T>* array,
+             uint32_t start_offset,
+             uint32_t end_offset,
+             size_t element_size = sizeof(T),
+             size_t alignment = alignof(T))
+      : array_(nullptr),
+        size_(end_offset - start_offset),
+        element_size_(element_size) {
+    DCHECK(array != nullptr || size_ == 0);
+    if (size_ != 0) {
+      DCHECK_LE(start_offset, end_offset);
+      DCHECK_LE(start_offset, array->size());
+      DCHECK_LE(end_offset, array->size());
+      array_ = &array->At(start_offset, element_size_, alignment);
+    }
+  }
+
+  T& At(size_t index) {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index);
+  }
+
+  const T& At(size_t index) const {
+    DCHECK_LT(index, size_);
+    return AtUnchecked(index);
+  }
+
+  T& operator[](size_t index) {
+    return At(index);
+  }
+
+  const T& operator[](size_t index) const {
+    return At(index);
+  }
+
+  StrideIterator<T> begin() {
+    return StrideIterator<T>(&AtUnchecked(0), element_size_);
+  }
+
+  StrideIterator<const T> begin() const {
+    return StrideIterator<const T>(&AtUnchecked(0), element_size_);
+  }
+
+  StrideIterator<T> end() {
+    return StrideIterator<T>(&AtUnchecked(size_), element_size_);
+  }
+
+  StrideIterator<const T> end() const {
+    return StrideIterator<const T>(&AtUnchecked(size_), element_size_);
+  }
+
+  IterationRange<StrideIterator<T>> AsRange() {
+    return size() != 0 ? MakeIterationRange(begin(), end())
+                       : MakeEmptyIterationRange(StrideIterator<T>(nullptr, 0));
+  }
+
+  size_t size() const {
+    return size_;
+  }
+
+  size_t ElementSize() const {
+    return element_size_;
+  }
+
+ private:
+  T& AtUnchecked(size_t index) {
+    return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
+  }
+
+  const T& AtUnchecked(size_t index) const {
+    return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
+  }
+
+  T* array_;
+  size_t size_;
+  size_t element_size_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_ARRAY_SLICE_H_
diff --git a/runtime/length_prefixed_array.h b/runtime/base/length_prefixed_array.h
similarity index 95%
rename from runtime/length_prefixed_array.h
rename to runtime/base/length_prefixed_array.h
index e01b6cc..d632871 100644
--- a/runtime/length_prefixed_array.h
+++ b/runtime/base/length_prefixed_array.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
-#define ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
+#ifndef ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
+#define ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
 
 #include <stddef.h>  // for offsetof()
 
@@ -110,4 +110,4 @@
 
 }  // namespace art
 
-#endif  // ART_RUNTIME_LENGTH_PREFIXED_ARRAY_H_
+#endif  // ART_RUNTIME_BASE_LENGTH_PREFIXED_ARRAY_H_
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index d4c9057..263f50d 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -63,6 +63,9 @@
   kLambdaTableLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
+  kRosAllocGlobalLock,
+  kRosAllocBracketLock,
+  kRosAllocBulkFreeLock,
   kTransactionLogLock,
   kMarkSweepMarkStackLock,
   kJniWeakGlobalsLock,
@@ -73,9 +76,6 @@
   kReferenceQueueClearedReferencesLock,
   kReferenceProcessorLock,
   kJitCodeCacheLock,
-  kRosAllocGlobalLock,
-  kRosAllocBracketLock,
-  kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index a5d10b2..ea1afa8 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -30,7 +30,7 @@
 namespace art {
 
 inline mirror::Class* ClassLinker::FindSystemClass(Thread* self, const char* descriptor) {
-  return FindClass(self, descriptor, NullHandle<mirror::ClassLoader>());
+  return FindClass(self, descriptor, ScopedNullHandle<mirror::ClassLoader>());
 }
 
 inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class** element_class) {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index f5085ed..d998d99 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -807,10 +807,7 @@
     auto* runtime = Runtime::Current();
     auto* image_space = runtime->GetHeap()->GetBootImageSpace();
     auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : klass->GetDirectMethods(pointer_size)) {
-      SanityCheckArtMethod(&m, klass, image_space);
-    }
-    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+    for (auto& m : klass->GetMethods(pointer_size)) {
       SanityCheckArtMethod(&m, klass, image_space);
     }
     auto* vtable = klass->GetVTable();
@@ -1442,8 +1439,12 @@
       if (klass != nullptr) {
         *result = EnsureResolved(self, descriptor, klass);
       } else {
-        *result = DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(),
-                              *pair.first, *pair.second);
+        *result = DefineClass(self,
+                              descriptor,
+                              hash,
+                              ScopedNullHandle<mirror::ClassLoader>(),
+                              *pair.first,
+                              *pair.second);
       }
       if (*result == nullptr) {
         CHECK(self->IsExceptionPending()) << descriptor;
@@ -1568,7 +1569,11 @@
     // The boot class loader, search the boot class path.
     ClassPathEntry pair = FindInClassPath(descriptor, hash, boot_class_path_);
     if (pair.second != nullptr) {
-      return DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(), *pair.first,
+      return DefineClass(self,
+                         descriptor,
+                         hash,
+                         ScopedNullHandle<mirror::ClassLoader>(),
+                         *pair.first,
                          *pair.second);
     } else {
       // The boot class loader is searched ahead of the application class loader, failures are
@@ -1877,12 +1882,10 @@
     // We're invoking a virtual method directly (thanks to sharpening), compute the oat_method_index
     // by search for its position in the declared virtual methods.
     oat_method_index = declaring_class->NumDirectMethods();
-    size_t end = declaring_class->NumVirtualMethods();
     bool found_virtual = false;
-    for (size_t i = 0; i < end; i++) {
+    for (ArtMethod& art_method : declaring_class->GetVirtualMethods(image_pointer_size_)) {
       // Check method index instead of identity in case of duplicate method definitions.
-      if (method->GetDexMethodIndex() ==
-          declaring_class->GetVirtualMethod(i, image_pointer_size_)->GetDexMethodIndex()) {
+      if (method->GetDexMethodIndex() == art_method.GetDexMethodIndex()) {
         found_virtual = true;
         break;
       }
@@ -2245,11 +2248,14 @@
     klass->SetIFieldsPtr(ifields);
     DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
     // Load methods.
-    klass->SetDirectMethodsPtr(AllocArtMethodArray(self, allocator, it.NumDirectMethods()));
-    klass->SetVirtualMethodsPtr(AllocArtMethodArray(self, allocator, it.NumVirtualMethods()));
+    klass->SetMethodsPtr(
+        AllocArtMethodArray(self, allocator, it.NumDirectMethods() + it.NumVirtualMethods()),
+        it.NumDirectMethods(),
+        it.NumVirtualMethods());
     size_t class_def_method_index = 0;
     uint32_t last_dex_method_index = DexFile::kDexNoIndex;
     size_t last_class_def_method_index = 0;
+    // TODO These should really use the iterators.
     for (size_t i = 0; it.HasNextDirectMethod(); i++, it.Next()) {
       ArtMethod* method = klass->GetDirectMethodUnchecked(i, image_pointer_size_);
       LoadMethod(self, dex_file, it, klass, method);
@@ -2728,9 +2734,12 @@
   return nullptr;
 }
 
-void ClassLinker::UpdateClassVirtualMethods(mirror::Class* klass,
-                                            LengthPrefixedArray<ArtMethod>* new_methods) {
-  klass->SetVirtualMethodsPtr(new_methods);
+// TODO This should really be in mirror::Class.
+void ClassLinker::UpdateClassMethods(mirror::Class* klass,
+                                     LengthPrefixedArray<ArtMethod>* new_methods) {
+  klass->SetMethodsPtrUnchecked(new_methods,
+                                klass->NumDirectMethods(),
+                                klass->NumDeclaredVirtualMethods());
   // Need to mark the card so that the remembered sets and mod union tables get updated.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(klass);
 }
@@ -3210,11 +3219,8 @@
 
 void ClassLinker::ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
                                                     Handle<mirror::Class> klass) {
-  for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
-    ResolveMethodExceptionHandlerTypes(dex_file, klass->GetDirectMethod(i, image_pointer_size_));
-  }
-  for (size_t i = 0; i < klass->NumVirtualMethods(); i++) {
-    ResolveMethodExceptionHandlerTypes(dex_file, klass->GetVirtualMethod(i, image_pointer_size_));
+  for (ArtMethod& method : klass->GetMethods(image_pointer_size_)) {
+    ResolveMethodExceptionHandlerTypes(dex_file, &method);
   }
 }
 
@@ -3302,29 +3308,30 @@
   throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // Proxies have 1 direct method, the constructor
-  LengthPrefixedArray<ArtMethod>* directs = AllocArtMethodArray(self, allocator, 1);
-  // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
-  // want to throw OOM in the future.
-  if (UNLIKELY(directs == nullptr)) {
-    self->AssertPendingOOMException();
-    return nullptr;
-  }
-  klass->SetDirectMethodsPtr(directs);
-  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+  const size_t num_direct_methods = 1;
 
-  // Create virtual method using specified prototypes.
+  // They have as many virtual methods as the array
   auto h_methods = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Method>*>(methods));
   DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass())
       << PrettyClass(h_methods->GetClass());
   const size_t num_virtual_methods = h_methods->GetLength();
-  auto* virtuals = AllocArtMethodArray(self, allocator, num_virtual_methods);
+
+  // Create the methods array.
+  LengthPrefixedArray<ArtMethod>* proxy_class_methods = AllocArtMethodArray(
+        self, allocator, num_direct_methods + num_virtual_methods);
   // Currently AllocArtMethodArray cannot return null, but the OOM logic is left there in case we
   // want to throw OOM in the future.
-  if (UNLIKELY(virtuals == nullptr)) {
+  if (UNLIKELY(proxy_class_methods == nullptr)) {
     self->AssertPendingOOMException();
     return nullptr;
   }
-  klass->SetVirtualMethodsPtr(virtuals);
+  klass->SetMethodsPtr(proxy_class_methods, num_direct_methods, num_virtual_methods);
+
+  // Create the single direct method.
+  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+
+  // Create virtual method using specified prototypes.
+  // TODO These should really use the iterators.
   for (size_t i = 0; i < num_virtual_methods; ++i) {
     auto* virtual_method = klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
     auto* prototype = h_methods->Get(i)->GetArtMethod();
@@ -4102,14 +4109,8 @@
   }
 
   DCHECK_EQ(temp_class->NumDirectMethods(), 0u);
-  for (auto& method : new_class->GetDirectMethods(image_pointer_size_)) {
-    if (method.GetDeclaringClass() == temp_class) {
-      method.SetDeclaringClass(new_class);
-    }
-  }
-
   DCHECK_EQ(temp_class->NumVirtualMethods(), 0u);
-  for (auto& method : new_class->GetVirtualMethods(image_pointer_size_)) {
+  for (auto& method : new_class->GetMethods(image_pointer_size_)) {
     if (method.GetDeclaringClass() == temp_class) {
       method.SetDeclaringClass(new_class);
     }
@@ -4193,8 +4194,7 @@
     // ArtMethod array pointers. If this occurs, it causes bugs in remembered sets since the GC
     // may not see any references to the target space and clean the card for a class if another
     // class had the same array pointer.
-    klass->SetDirectMethodsPtrUnchecked(nullptr);
-    klass->SetVirtualMethodsPtr(nullptr);
+    klass->SetMethodsPtrUnchecked(nullptr, 0, 0);
     klass->SetSFieldsPtrUnchecked(nullptr);
     klass->SetIFieldsPtrUnchecked(nullptr);
     if (UNLIKELY(h_new_class.Get() == nullptr)) {
@@ -4959,12 +4959,10 @@
   for (size_t k = ifstart + 1; k < iftable_count; k++) {
     // Skip ifstart since our current interface obviously cannot override itself.
     current_iface.Assign(iftable->GetInterface(k));
-    size_t num_instance_methods = current_iface->NumVirtualMethods();
-    // Iterate through every method on this interface. The order does not matter so we go forwards.
-    for (size_t m = 0; m < num_instance_methods; m++) {
-      ArtMethod* current_method = current_iface->GetVirtualMethodUnchecked(m, image_pointer_size);
+    // Iterate through every method on this interface. The order does not matter.
+    for (ArtMethod& current_method : current_iface->GetDeclaredVirtualMethods(image_pointer_size)) {
       if (UNLIKELY(target.HasSameNameAndSignature(
-                      current_method->GetInterfaceMethodIfProxy(image_pointer_size)))) {
+                      current_method.GetInterfaceMethodIfProxy(image_pointer_size)))) {
         // Check if the i'th interface is a subtype of this one.
         if (iface->IsAssignableFrom(current_iface.Get())) {
           return true;
@@ -5017,10 +5015,9 @@
     DCHECK_LT(k, iftable->Count());
 
     iface.Assign(iftable->GetInterface(k));
-    size_t num_instance_methods = iface->NumVirtualMethods();
-    // Iterate through every method on this interface. The order does not matter so we go forwards.
-    for (size_t m = 0; m < num_instance_methods; m++) {
-      ArtMethod* current_method = iface->GetVirtualMethodUnchecked(m, image_pointer_size_);
+    // Iterate through every declared method on this interface. The order does not matter.
+    for (auto& method_iter : iface->GetDeclaredVirtualMethods(image_pointer_size_)) {
+      ArtMethod* current_method = &method_iter;
       // Skip abstract methods and methods with different names.
       if (current_method->IsAbstract() ||
           !target_name_comparator.HasSameNameAndSignature(
@@ -5327,6 +5324,26 @@
   return nullptr;
 }
 
+static void SanityCheckVTable(Handle<mirror::Class> klass, uint32_t pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::PointerArray* check_vtable = klass->GetVTableDuringLinking();
+  mirror::Class* superclass = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
+  int32_t super_vtable_length = (superclass != nullptr) ? superclass->GetVTableLength() : 0;
+  for (int32_t i = 0; i < check_vtable->GetLength(); ++i) {
+    ArtMethod* m = check_vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size);
+    CHECK(m != nullptr);
+
+    ArraySlice<ArtMethod> virtuals = klass->GetVirtualMethodsSliceUnchecked(pointer_size);
+    auto is_same_method = [m] (const ArtMethod& meth) {
+      return &meth == m;
+    };
+    CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
+          std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())
+        << "While linking class '" << PrettyClass(klass.Get()) << "' unable to find owning class "
+        << "of '" << PrettyMethod(m) << "' (vtable index: " << i << ").";
+  }
+}
+
 bool ClassLinker::LinkInterfaceMethods(
     Thread* self,
     Handle<mirror::Class> klass,
@@ -5449,25 +5466,31 @@
       const bool super_interface = is_super && extend_super_iftable;
       auto method_array(hs2.NewHandle(iftable->GetMethodArray(i)));
 
-      LengthPrefixedArray<ArtMethod>* input_virtual_methods = nullptr;
-      Handle<mirror::PointerArray> input_vtable_array = NullHandle<mirror::PointerArray>();
+      ArraySlice<ArtMethod> input_virtual_methods;
+      ScopedNullHandle<mirror::PointerArray> null_handle;
+      Handle<mirror::PointerArray> input_vtable_array(null_handle);
       int32_t input_array_length = 0;
+
       // TODO Cleanup Needed: In the presence of default methods this optimization is rather dirty
       //      and confusing. Default methods should always look through all the superclasses
       //      because they are the last choice of an implementation. We get around this by looking
       //      at the super-classes iftable methods (copied into method_array previously) when we are
       //      looking for the implementation of a super-interface method but that is rather dirty.
+      bool using_virtuals;
       if (super_interface) {
-        // We are overwriting a super class interface, try to only virtual methods instead of the
+        // If we are overwriting a super class interface, try to only virtual methods instead of the
         // whole vtable.
-        input_virtual_methods = klass->GetVirtualMethodsPtr();
-        input_array_length = klass->NumVirtualMethods();
+        using_virtuals = true;
+        input_virtual_methods = klass->GetDeclaredMethodsSlice(image_pointer_size_);
+        input_array_length = input_virtual_methods.size();
       } else {
-        // A new interface, we need the whole vtable in case a new interface method is implemented
-        // in the whole superclass.
+        // For a new interface, however, we need the whole vtable in case a new
+        // interface method is implemented in the whole superclass.
+        using_virtuals = false;
         input_vtable_array = vtable;
         input_array_length = input_vtable_array->GetLength();
       }
+
       // For each method in interface
       for (size_t j = 0; j < num_methods; ++j) {
         auto* interface_method = iftable->GetInterface(i)->GetVirtualMethod(j, image_pointer_size_);
@@ -5488,8 +5511,8 @@
         bool found_impl = false;
         ArtMethod* vtable_impl = nullptr;
         for (int32_t k = input_array_length - 1; k >= 0; --k) {
-          ArtMethod* vtable_method = input_virtual_methods != nullptr ?
-              &input_virtual_methods->At(k, method_size, method_alignment) :
+          ArtMethod* vtable_method = using_virtuals ?
+              &input_virtual_methods[k] :
               input_vtable_array->GetElementPtrSize<ArtMethod*>(k, image_pointer_size_);
           ArtMethod* vtable_method_for_name_comparison =
               vtable_method->GetInterfaceMethodIfProxy(image_pointer_size_);
@@ -5650,38 +5673,39 @@
     VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
                        << " default_methods=" << default_methods.size()
                        << " default_conflict_methods=" << default_conflict_methods.size();
-    const size_t old_method_count = klass->NumVirtualMethods();
+    const size_t old_method_count = klass->NumMethods();
     const size_t new_method_count = old_method_count +
                                     miranda_methods.size() +
                                     default_methods.size() +
                                     default_conflict_methods.size();
     // Attempt to realloc to save RAM if possible.
-    LengthPrefixedArray<ArtMethod>* old_virtuals = klass->GetVirtualMethodsPtr();
-    // The Realloced virtual methods aren't visiblef from the class roots, so there is no issue
+    LengthPrefixedArray<ArtMethod>* old_methods = klass->GetMethodsPtr();
+    // The Realloced virtual methods aren't visible from the class roots, so there is no issue
     // where GCs could attempt to mark stale pointers due to memcpy. And since we overwrite the
     // realloced memory with out->CopyFrom, we are guaranteed to have objects in the to space since
     // CopyFrom has internal read barriers.
-    const size_t old_size = old_virtuals != nullptr
-        ? LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count,
-                                                      method_size,
-                                                      method_alignment)
-        : 0u;
+    //
+    // TODO We should maybe move some of this into mirror::Class or at least into another method.
+    const size_t old_size = LengthPrefixedArray<ArtMethod>::ComputeSize(old_method_count,
+                                                                        method_size,
+                                                                        method_alignment);
     const size_t new_size = LengthPrefixedArray<ArtMethod>::ComputeSize(new_method_count,
                                                                         method_size,
                                                                         method_alignment);
-    auto* virtuals = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-        runtime->GetLinearAlloc()->Realloc(self, old_virtuals, old_size, new_size));
-    if (UNLIKELY(virtuals == nullptr)) {
+    const size_t old_methods_ptr_size = (old_methods != nullptr) ? old_size : 0;
+    auto* methods = reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
+        runtime->GetLinearAlloc()->Realloc(self, old_methods, old_methods_ptr_size, new_size));
+    if (UNLIKELY(methods == nullptr)) {
       self->AssertPendingOOMException();
       self->EndAssertNoThreadSuspension(old_cause);
       return false;
     }
     ScopedArenaUnorderedMap<ArtMethod*, ArtMethod*> move_table(allocator.Adapter());
-    if (virtuals != old_virtuals) {
+    if (methods != old_methods) {
       // Maps from heap allocated miranda method to linear alloc miranda method.
-      StrideIterator<ArtMethod> out = virtuals->begin(method_size, method_alignment);
-      // Copy over the old methods + miranda methods.
-      for (auto& m : klass->GetVirtualMethods(image_pointer_size_)) {
+      StrideIterator<ArtMethod> out = methods->begin(method_size, method_alignment);
+      // Copy over the old methods.
+      for (auto& m : klass->GetMethods(image_pointer_size_)) {
         move_table.emplace(&m, &*out);
         // The CopyFrom is only necessary to not miss read barriers since Realloc won't do read
         // barriers when it copies.
@@ -5689,8 +5713,7 @@
         ++out;
       }
     }
-    StrideIterator<ArtMethod> out(virtuals->begin(method_size, method_alignment)
-                                      + old_method_count);
+    StrideIterator<ArtMethod> out(methods->begin(method_size, method_alignment) + old_method_count);
     // Copy over miranda methods before copying vtable since CopyOf may cause thread suspension and
     // we want the roots of the miranda methods to get visited.
     for (ArtMethod* mir_method : miranda_methods) {
@@ -5702,9 +5725,8 @@
       move_table.emplace(mir_method, &new_method);
       ++out;
     }
-    // We need to copy the default methods into our own virtual method table since the runtime
-    // requires that every method on a class's vtable be in that respective class's virtual method
-    // table.
+    // We need to copy the default methods into our own method table since the runtime requires that
+    // every method on a class's vtable be in that respective class's virtual method table.
     // NOTE This means that two classes might have the same implementation of a method from the same
     // interface but will have different ArtMethod*s for them. This also means we cannot compare a
     // default method found on a class with one found on the declaring interface directly and must
@@ -5738,8 +5760,8 @@
       move_table.emplace(conf_method, &new_method);
       ++out;
     }
-    virtuals->SetSize(new_method_count);
-    UpdateClassVirtualMethods(klass.Get(), virtuals);
+    methods->SetSize(new_method_count);
+    UpdateClassMethods(klass.Get(), methods);
     // Done copying methods, they are all roots in the class now, so we can end the no thread
     // suspension assert.
     self->EndAssertNoThreadSuspension(old_cause);
@@ -5755,7 +5777,7 @@
       self->AssertPendingOOMException();
       return false;
     }
-    out = virtuals->begin(method_size, method_alignment) + old_method_count;
+    out = methods->begin(method_size, method_alignment) + old_method_count;
     size_t vtable_pos = old_vtable_count;
     for (size_t i = old_method_count; i < new_method_count; ++i) {
       // Leave the declaring class alone as type indices are relative to it
@@ -5809,8 +5831,16 @@
       }
     }
 
+    if (kIsDebugBuild) {
+      for (size_t i = 0; i < new_vtable_count; ++i) {
+        CHECK(move_table.find(vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_)) ==
+              move_table.end());
+      }
+    }
+
     klass->SetVTable(vtable.Get());
-    // Go fix up all the stale miranda pointers.
+    // Go fix up all the stale (old miranda or default method) pointers.
+    // First do it on the iftable.
     for (size_t i = 0; i < ifcount; ++i) {
       for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
         auto* method_array = iftable->GetMethodArray(i);
@@ -5824,7 +5854,7 @@
         }
       }
     }
-    // Fix up IMT in case it has any miranda methods in it.
+    // Fix up IMT next
     for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
       auto it = move_table.find(out_imt[i]);
       if (it != move_table.end()) {
@@ -5836,25 +5866,26 @@
       auto* resolved_methods = klass->GetDexCache()->GetResolvedMethods();
       for (size_t i = 0, count = klass->GetDexCache()->NumResolvedMethods(); i < count; ++i) {
         auto* m = mirror::DexCache::GetElementPtrSize(resolved_methods, i, image_pointer_size_);
-        // We don't remove default methods from the move table since we need them to update the
-        // vtable. Therefore just skip them for this check.
-        if (!m->IsDefault()) {
-          CHECK(move_table.find(m) == move_table.end()) << PrettyMethod(m);
-        }
+        CHECK(move_table.find(m) == move_table.end() ||
+              // The original versions of copied methods will still be present so allow those too.
+              // Note that if the first check passes this might fail to GetDeclaringClass().
+              std::find_if(m->GetDeclaringClass()->GetMethods(image_pointer_size_).begin(),
+                           m->GetDeclaringClass()->GetMethods(image_pointer_size_).end(),
+                           [m] (ArtMethod& meth) {
+                             return &meth == m;
+                           }) != m->GetDeclaringClass()->GetMethods(image_pointer_size_).end())
+            << "Obsolete methods " << PrettyMethod(m) << " is in dex cache!";
       }
     }
-    // Put some random garbage in old virtuals to help find stale pointers.
-    if (virtuals != old_virtuals) {
-      memset(old_virtuals, 0xFEu, old_size);
+    // Put some random garbage in old methods to help find stale pointers.
+    if (methods != old_methods && old_methods != nullptr) {
+      memset(old_methods, 0xFEu, old_size);
     }
   } else {
     self->EndAssertNoThreadSuspension(old_cause);
   }
   if (kIsDebugBuild) {
-    auto* check_vtable = klass->GetVTableDuringLinking();
-    for (int i = 0; i < check_vtable->GetLength(); ++i) {
-      CHECK(check_vtable->GetElementPtrSize<ArtMethod*>(i, image_pointer_size_) != nullptr);
-    }
+    SanityCheckVTable(klass, image_pointer_size_);
   }
   return true;
 }
@@ -5929,6 +5960,20 @@
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
+  //
+  // The overall sort order order is:
+  // 1) All object reference fields, sorted alphabetically.
+  // 2) All java long (64-bit) integer fields, sorted alphabetically.
+  // 3) All java double (64-bit) floating point fields, sorted alphabetically.
+  // 4) All java int (32-bit) integer fields, sorted alphabetically.
+  // 5) All java float (32-bit) floating point fields, sorted alphabetically.
+  // 6) All java char (16-bit) integer fields, sorted alphabetically.
+  // 7) All java short (16-bit) integer fields, sorted alphabetically.
+  // 8) All java boolean (8-bit) integer fields, sorted alphabetically.
+  // 9) All java byte (8-bit) integer fields, sorted alphabetically.
+  //
+  // Once the fields are sorted in this order we will attempt to fill any gaps that might be present
+  // in the memory layout of the structure. See ShuffleForward for how this is done.
   std::deque<ArtField*> grouped_and_sorted_fields;
   const char* old_no_suspend_cause = self->StartAssertNoThreadSuspension(
       "Naked ArtField references in deque");
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 0d3bc1e..f16fe92 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -983,8 +983,8 @@
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void UpdateClassVirtualMethods(mirror::Class* klass,
-                                 LengthPrefixedArray<ArtMethod>* new_methods)
+  void UpdateClassMethods(mirror::Class* klass,
+                          LengthPrefixedArray<ArtMethod>* new_methods)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 2c086c5..59a43ee 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -254,10 +254,20 @@
       EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
     }
 
-    for (ArtMethod& method : klass->GetVirtualMethods(sizeof(void*))) {
+    for (ArtMethod& method : klass->GetDeclaredVirtualMethods(sizeof(void*))) {
       AssertMethod(&method);
       EXPECT_FALSE(method.IsDirect());
-      EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()));
+      EXPECT_EQ(klass.Get(), method.GetDeclaringClass());
+    }
+
+    for (ArtMethod& method : klass->GetCopiedMethods(sizeof(void*))) {
+      AssertMethod(&method);
+      EXPECT_FALSE(method.IsDirect());
+      EXPECT_TRUE(method.IsMiranda() || method.IsDefault() || method.IsDefaultConflicting());
+      EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
+          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
+      EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
+          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
     }
 
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
@@ -496,13 +506,14 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, class_size_), "classSize");
     addOffset(OFFSETOF_MEMBER(mirror::Class, clinit_thread_id_), "clinitThreadId");
     addOffset(OFFSETOF_MEMBER(mirror::Class, component_type_), "componentType");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, copied_methods_offset_), "copiedMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_), "dexCache");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, direct_methods_), "directMethods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ifields_), "iFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, iftable_), "ifTable");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, methods_), "methods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, name_), "name");
     addOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_instance_fields_),
               "numReferenceInstanceFields");
@@ -516,7 +527,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
     addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_), "virtualMethods");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_offset_), "virtualMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
 };
@@ -844,7 +855,7 @@
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
   ScopedObjectAccess soa(Thread::Current());
-  NullHandle<mirror::ClassLoader> class_loader;
+  ScopedNullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
   EXPECT_STREQ("value", c->GetIFieldsPtr()->At(0).GetName());
@@ -1090,7 +1101,7 @@
 
 TEST_F(ClassLinkerTest, ValidatePredefinedClassSizes) {
   ScopedObjectAccess soa(Thread::Current());
-  NullHandle<mirror::ClassLoader> class_loader;
+  ScopedNullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
 
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Class;", class_loader);
@@ -1122,10 +1133,7 @@
     SHARED_REQUIRES(Locks::mutator_lock_) {
   EXPECT_EQ((c->GetAccessFlags() & kAccPreverified) != 0U, preverified)
       << "Class " << PrettyClass(c) << " not as expected";
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    CheckMethod(&m, preverified);
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(sizeof(void*))) {
     CheckMethod(&m, preverified);
   }
 }
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index f705a50..2640a6e 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -328,6 +328,19 @@
   class_linker_ = runtime_->GetClassLinker();
   class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
 
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
+  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+  // Get the boot class path from the runtime so it can be used in tests.
+  boot_class_path_ = class_linker_->GetBootClassPath();
+  ASSERT_FALSE(boot_class_path_.empty());
+  java_lang_dex_file_ = boot_class_path_[0];
+
+  FinalizeSetup();
+}
+
+void CommonRuntimeTest::FinalizeSetup() {
   // Initialize maps for unstarted runtime. This needs to be here, as running clinits needs this
   // set up.
   if (!unstarted_initialized_) {
@@ -335,14 +348,10 @@
     unstarted_initialized_ = true;
   }
 
-  class_linker_->RunRootClinits();
-  boot_class_path_ = class_linker_->GetBootClassPath();
-  java_lang_dex_file_ = boot_class_path_[0];
-
-
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we
-  // Runtime::Start, give it away now and then switch to a more managable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker_->RunRootClinits();
+  }
 
   // We're back in native, take the opportunity to initialize well known classes.
   WellKnownClasses::Init(Thread::Current()->GetJniEnv());
@@ -353,11 +362,6 @@
   runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
   // Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject).
   runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U);
-
-  // Get the boot class path from the runtime so it can be used in tests.
-  boot_class_path_ = class_linker_->GetBootClassPath();
-  ASSERT_FALSE(boot_class_path_.empty());
-  java_lang_dex_file_ = boot_class_path_[0];
 }
 
 void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 6da2bef..f318457 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -114,6 +114,10 @@
   // Called after the runtime is created.
   virtual void PostRuntimeCreate() {}
 
+  // Called to finish up runtime creation and filling test fields. By default runs root
+  // initializers, initialize well-known classes, and creates the heap thread pool.
+  virtual void FinalizeSetup();
+
   // Gets the path of the specified dex file for host or target.
   static std::string GetDexFileName(const std::string& jar_prefix);
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index e5d648b..c32331f 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1491,25 +1491,20 @@
     return error;
   }
 
-  size_t direct_method_count = c->NumDirectMethods();
-  size_t virtual_method_count = c->NumVirtualMethods();
-
-  expandBufAdd4BE(pReply, direct_method_count + virtual_method_count);
+  expandBufAdd4BE(pReply, c->NumMethods());
 
   auto* cl = Runtime::Current()->GetClassLinker();
   auto ptr_size = cl->GetImagePointerSize();
-  for (size_t i = 0; i < direct_method_count + virtual_method_count; ++i) {
-    ArtMethod* m = i < direct_method_count ?
-        c->GetDirectMethod(i, ptr_size) : c->GetVirtualMethod(i - direct_method_count, ptr_size);
-    expandBufAddMethodId(pReply, ToMethodId(m));
-    expandBufAddUtf8String(pReply, m->GetInterfaceMethodIfProxy(sizeof(void*))->GetName());
+  for (ArtMethod& m : c->GetMethods(ptr_size)) {
+    expandBufAddMethodId(pReply, ToMethodId(&m));
+    expandBufAddUtf8String(pReply, m.GetInterfaceMethodIfProxy(sizeof(void*))->GetName());
     expandBufAddUtf8String(pReply,
-                           m->GetInterfaceMethodIfProxy(sizeof(void*))->GetSignature().ToString());
+                           m.GetInterfaceMethodIfProxy(sizeof(void*))->GetSignature().ToString());
     if (with_generic) {
       const char* generic_signature = "";
       expandBufAddUtf8String(pReply, generic_signature);
     }
-    expandBufAdd4BE(pReply, MangleAccessFlags(m->GetAccessFlags()));
+    expandBufAdd4BE(pReply, MangleAccessFlags(m.GetAccessFlags()));
   }
   return JDWP::ERR_NONE;
 }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 880d3e0..bc8ba97 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -1383,8 +1383,11 @@
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Object* obj = GetAnnotationValue(
-      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationType);
+  mirror::Object* obj = GetAnnotationValue(klass,
+                                           annotation_item,
+                                           "value",
+                                           ScopedNullHandle<mirror::Class>(),
+                                           kDexAnnotationType);
   if (obj == nullptr) {
     return nullptr;
   }
@@ -1410,8 +1413,11 @@
     return nullptr;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllRaw)) {
     return nullptr;
   }
   if (annotation_value.type_ != kDexAnnotationMethod) {
@@ -1439,7 +1445,7 @@
     return nullptr;
   }
   return GetAnnotationValue(
-      klass, annotation_item, "value", NullHandle<mirror::Class>(), kDexAnnotationMethod);
+      klass, annotation_item, "value", ScopedNullHandle<mirror::Class>(), kDexAnnotationMethod);
 }
 
 bool DexFile::GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) const {
@@ -1457,8 +1463,11 @@
     return false;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllObjects)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllObjects)) {
     return false;
   }
   if (annotation_value.type_ != kDexAnnotationNull &&
@@ -1484,8 +1493,11 @@
     return false;
   }
   AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue(
-      klass, &annotation, &annotation_value, NullHandle<mirror::Class>(), kAllRaw)) {
+  if (!ProcessAnnotationValue(klass,
+                              &annotation,
+                              &annotation_value,
+                              ScopedNullHandle<mirror::Class>(),
+                              kAllRaw)) {
     return false;
   }
   if (annotation_value.type_ != kDexAnnotationInt) {
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 2819670..57d623e 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -27,89 +27,12 @@
 #include "base/unix_file/fd_file.h"
 #include "elf_file_impl.h"
 #include "elf_utils.h"
+#include "jit/debugger_interface.h"
 #include "leb128.h"
 #include "utils.h"
 
 namespace art {
 
-// -------------------------------------------------------------------
-// Binary GDB JIT Interface as described in
-//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
-extern "C" {
-  typedef enum {
-    JIT_NOACTION = 0,
-    JIT_REGISTER_FN,
-    JIT_UNREGISTER_FN
-  } JITAction;
-
-  struct JITCodeEntry {
-    JITCodeEntry* next_;
-    JITCodeEntry* prev_;
-    const uint8_t *symfile_addr_;
-    uint64_t symfile_size_;
-  };
-
-  struct JITDescriptor {
-    uint32_t version_;
-    uint32_t action_flag_;
-    JITCodeEntry* relevant_entry_;
-    JITCodeEntry* first_entry_;
-  };
-
-  // GDB will place breakpoint into this function.
-  // To prevent GCC from inlining or removing it we place noinline attribute
-  // and inline assembler statement inside.
-  void __attribute__((noinline)) __jit_debug_register_code();
-  void __attribute__((noinline)) __jit_debug_register_code() {
-    __asm__("");
-  }
-
-  // GDB will inspect contents of this descriptor.
-  // Static initialization is necessary to prevent GDB from seeing
-  // uninitialized descriptor.
-  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
-}
-
-
-static JITCodeEntry* CreateCodeEntry(const uint8_t *symfile_addr,
-                                     uintptr_t symfile_size) {
-  JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr;
-  entry->symfile_size_ = symfile_size;
-  entry->prev_ = nullptr;
-
-  // TODO: Do we need a lock here?
-  entry->next_ = __jit_debug_descriptor.first_entry_;
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry;
-  }
-  __jit_debug_descriptor.first_entry_ = entry;
-  __jit_debug_descriptor.relevant_entry_ = entry;
-
-  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
-  __jit_debug_register_code();
-  return entry;
-}
-
-
-static void UnregisterCodeEntry(JITCodeEntry* entry) {
-  // TODO: Do we need a lock here?
-  if (entry->prev_ != nullptr) {
-    entry->prev_->next_ = entry->next_;
-  } else {
-    __jit_debug_descriptor.first_entry_ = entry->next_;
-  }
-
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry->prev_;
-  }
-
-  __jit_debug_descriptor.relevant_entry_ = entry;
-  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
-  __jit_debug_register_code();
-  delete entry;
-}
-
 template <typename ElfTypes>
 ElfFileImpl<ElfTypes>::ElfFileImpl(File* file, bool writable,
                                    bool program_header_only,
@@ -352,7 +275,7 @@
   delete dynsym_symbol_table_;
   delete jit_elf_image_;
   if (jit_gdb_entry_) {
-    UnregisterCodeEntry(jit_gdb_entry_);
+    DeleteJITCodeEntry(jit_gdb_entry_);
   }
 }
 
@@ -1511,7 +1434,7 @@
     return;
   }
 
-  jit_gdb_entry_ = CreateCodeEntry(all.Begin(), all.Size());
+  jit_gdb_entry_ = CreateJITCodeEntry(all.Begin(), all.Size());
   gdb_file_mapping_.reset(all_ptr.release());
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 87e29ae..915d9ab 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -306,11 +306,13 @@
       mirror::Method* interface_method = soa.Decode<mirror::Method*>(interface_method_jobj);
       ArtMethod* proxy_method = rcvr->GetClass()->FindVirtualMethodForInterface(
           interface_method->GetArtMethod(), sizeof(void*));
-      auto* virtual_methods = proxy_class->GetVirtualMethodsPtr();
+      auto virtual_methods = proxy_class->GetVirtualMethodsSlice(sizeof(void*));
       size_t num_virtuals = proxy_class->NumVirtualMethods();
       size_t method_size = ArtMethod::Size(sizeof(void*));
+      // Rely on the fact that the methods are contiguous to determine the index of the method in
+      // the slice.
       int throws_index = (reinterpret_cast<uintptr_t>(proxy_method) -
-          reinterpret_cast<uintptr_t>(virtual_methods)) / method_size;
+          reinterpret_cast<uintptr_t>(&virtual_methods.At(0))) / method_size;
       CHECK_LT(throws_index, static_cast<int>(num_virtuals));
       mirror::ObjectArray<mirror::Class>* declared_exceptions =
           proxy_class->GetThrows()->Get(throws_index);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 4e85913..01e22a4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -31,6 +31,13 @@
     options->push_back(std::make_pair("imageinstructionset", "x86_64"));
   }
 
+  // Do not do any of the finalization. We don't want to run any code, we don't need the heap
+  // prepared, it actually will be a problem with setting the instruction set to x86_64 in
+  // SetUpRuntimeOptions.
+  void FinalizeSetup() OVERRIDE {
+    ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
+  }
+
   static ArtMethod* CreateCalleeSaveMethod(InstructionSet isa, Runtime::CalleeSaveType type)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* r = Runtime::Current();
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index e9497a2..99e98bb 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -367,37 +367,26 @@
                                    GetTimings());
       table->UpdateAndMarkReferences(this);
       DCHECK(GetHeap()->FindRememberedSetFromSpace(space) == nullptr);
-    } else if (collect_from_space_only_ && space->GetLiveBitmap() != nullptr) {
-      // If the space has no mod union table (the non-moving space and main spaces when the bump
-      // pointer space only collection is enabled,) then we need to scan its live bitmap or dirty
-      // cards as roots (including the objects on the live stack which have just marked in the live
-      // bitmap above in MarkAllocStackAsLive().)
-      DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
-          << "Space " << space->GetName() << " "
-          << "generational_=" << generational_ << " "
-          << "collect_from_space_only_=" << collect_from_space_only_;
+    } else if ((space->IsImageSpace() || collect_from_space_only_) &&
+               space->GetLiveBitmap() != nullptr) {
+      // If the space has no mod union table (the non-moving space, app image spaces, main spaces
+      // when the bump pointer space only collection is enabled,) then we need to scan its live
+      // bitmap or dirty cards as roots (including the objects on the live stack which have just
+      // marked in the live bitmap above in MarkAllocStackAsLive().)
       accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space);
-      if (kUseRememberedSet) {
+      if (!space->IsImageSpace()) {
+        DCHECK(space == heap_->GetNonMovingSpace() || space == heap_->GetPrimaryFreeListSpace())
+            << "Space " << space->GetName() << " "
+            << "generational_=" << generational_ << " "
+            << "collect_from_space_only_=" << collect_from_space_only_;
         // App images currently do not have remembered sets.
-        DCHECK((space->IsImageSpace() && space != heap_->GetBootImageSpace()) ||
-               rem_set != nullptr);
+        DCHECK_EQ(kUseRememberedSet, rem_set != nullptr);
       } else {
         DCHECK(rem_set == nullptr);
       }
       if (rem_set != nullptr) {
         TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
         rem_set->UpdateAndMarkReferences(from_space_, this);
-        if (kIsDebugBuild) {
-          // Verify that there are no from-space references that
-          // remain in the space, that is, the remembered set (and the
-          // card table) didn't miss any from-space references in the
-          // space.
-          accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-          SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
-          live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                        reinterpret_cast<uintptr_t>(space->End()),
-                                        visitor);
-        }
       } else {
         TimingLogger::ScopedTiming t2("VisitLiveBits", GetTimings());
         accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
@@ -406,6 +395,17 @@
                                       reinterpret_cast<uintptr_t>(space->End()),
                                       visitor);
       }
+      if (kIsDebugBuild) {
+        // Verify that there are no from-space references that
+        // remain in the space, that is, the remembered set (and the
+        // card table) didn't miss any from-space references in the
+        // space.
+        accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+        SemiSpaceVerifyNoFromSpaceReferencesObjectVisitor visitor(this);
+        live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                      reinterpret_cast<uintptr_t>(space->End()),
+                                      visitor);
+      }
     }
   }
 
diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index ab921d9..dc23afe 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc
@@ -35,7 +35,7 @@
   ASSERT_EQ(queue.GetLength(), 0U);
   auto ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(ref_class.Get() != nullptr);
   auto ref1(hs.NewHandle(ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref1.Get() != nullptr);
@@ -65,11 +65,11 @@
   queue.Dump(LOG(INFO));
   auto weak_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/WeakReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(weak_ref_class.Get() != nullptr);
   auto finalizer_ref_class = hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindClass(self, "Ljava/lang/ref/FinalizerReference;",
-                                                      NullHandle<mirror::ClassLoader>()));
+                                                      ScopedNullHandle<mirror::ClassLoader>()));
   ASSERT_TRUE(finalizer_ref_class.Get() != nullptr);
   auto ref1(hs.NewHandle(weak_ref_class->AllocObject(self)->AsReference()));
   ASSERT_TRUE(ref1.Get() != nullptr);
diff --git a/runtime/handle.h b/runtime/handle.h
index f939ec5..5b3bb60 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -64,7 +64,7 @@
 
   ALWAYS_INLINE jobject ToJObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
     if (UNLIKELY(reference_->AsMirrorPtr() == nullptr)) {
-      // Special case so that we work with NullHandles.
+      // Special case so that we work with null handles.
       return nullptr;
     }
     return reinterpret_cast<jobject>(reference_);
@@ -147,12 +147,12 @@
   template<size_t kNumReferences> friend class StackHandleScope;
 };
 
-// A special case of Handle that only holds references to null.
+// A special case of Handle that only holds references to null. Invalid when if it goes out of
+// scope. Example: Handle<T> h = ScopedNullHandle<T> will leave h being undefined.
 template<class T>
-class NullHandle : public Handle<T> {
+class ScopedNullHandle : public Handle<T> {
  public:
-  NullHandle() : Handle<T>(&null_ref_) {
-  }
+  ScopedNullHandle() : Handle<T>(&null_ref_) {}
 
  private:
   StackReference<mirror::Object> null_ref_;
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 264cd2c..9f61449 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -93,11 +93,8 @@
     // We need the class to be resolved to install/uninstall stubs. Otherwise its methods
     // could not be initialized or linked with regards to class inheritance.
   } else {
-    for (size_t i = 0, e = klass->NumDirectMethods(); i < e; i++) {
-      InstallStubsForMethod(klass->GetDirectMethod(i, sizeof(void*)));
-    }
-    for (size_t i = 0, e = klass->NumVirtualMethods(); i < e; i++) {
-      InstallStubsForMethod(klass->GetVirtualMethod(i, sizeof(void*)));
+    for (ArtMethod& method : klass->GetMethods(sizeof(void*))) {
+      InstallStubsForMethod(&method);
     }
   }
 }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 92b6e4f..60ad0cb 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -128,8 +128,13 @@
   }
   StackHandleScope<1> hs(self);
   Handle<mirror::String> h_class_name(hs.NewHandle(class_name));
-  UnstartedRuntimeFindClass(self, h_class_name, NullHandle<mirror::ClassLoader>(), result,
-                            "Class.forName", true, false);
+  UnstartedRuntimeFindClass(self,
+                            h_class_name,
+                            ScopedNullHandle<mirror::ClassLoader>(),
+                            result,
+                            "Class.forName",
+                            true,
+                            false);
   CheckExceptionGenerateClassNotFound(self);
 }
 
@@ -704,7 +709,7 @@
   Handle<mirror::Class> h_class(hs.NewHandle(
       runtime->GetClassLinker()->FindClass(self,
                                            "Ljava/io/StringReader;",
-                                           NullHandle<mirror::ClassLoader>())));
+                                           ScopedNullHandle<mirror::ClassLoader>())));
   if (h_class.Get() == nullptr) {
     AbortTransactionOrFail(self, "Could not find StringReader class");
     return;
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 15f5122..5c44193 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -717,8 +717,8 @@
 }
 
 bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
-                                  jstring library_path, jstring permitted_path,
-                                  std::string* error_msg) {
+                                  bool is_shared_namespace, jstring library_path,
+                                  jstring permitted_path, std::string* error_msg) {
   error_msg->clear();
 
   // See if we've already loaded this library.  If we have, and the class loader
@@ -777,8 +777,9 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
-  void* handle = android::OpenNativeLibrary(env, runtime_->GetTargetSdkVersion(),
-                                            path_str, class_loader, library_path, permitted_path);
+  void* handle = android::OpenNativeLibrary(env, runtime_->GetTargetSdkVersion(), path_str,
+                                            class_loader, is_shared_namespace, library_path,
+                                            permitted_path);
   bool needs_native_bridge = false;
   if (handle == nullptr) {
     if (android::NativeBridgeIsSupported(path_str)) {
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 8559769..8cae1e5 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -85,8 +85,9 @@
    * Returns 'true' on success. On failure, sets 'error_msg' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject javaLoader,
-                         jstring library_path, jstring permitted_path, std::string* error_msg);
+  bool LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject class_loader,
+                         bool is_shared_namespace, jstring library_path, jstring permitted_path,
+                         std::string* error_msg);
 
   // Unload native libraries with cleared class loaders.
   void UnloadNativeLibraries()
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
new file mode 100644
index 0000000..3c2898b
--- /dev/null
+++ b/runtime/jit/debugger_interface.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "debugger_interface.h"
+
+namespace art {
+
+// -------------------------------------------------------------------
+// Binary GDB JIT Interface as described in
+//   http://sourceware.org/gdb/onlinedocs/gdb/Declarations.html
+// -------------------------------------------------------------------
+extern "C" {
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } JITAction;
+
+  struct JITCodeEntry {
+    JITCodeEntry* next_;
+    JITCodeEntry* prev_;
+    const uint8_t *symfile_addr_;
+    uint64_t symfile_size_;
+  };
+
+  struct JITDescriptor {
+    uint32_t version_;
+    uint32_t action_flag_;
+    JITCodeEntry* relevant_entry_;
+    JITCodeEntry* first_entry_;
+  };
+
+  // GDB will place breakpoint into this function.
+  // To prevent GCC from inlining or removing it we place noinline attribute
+  // and inline assembler statement inside.
+  void __attribute__((noinline)) __jit_debug_register_code();
+  void __attribute__((noinline)) __jit_debug_register_code() {
+    __asm__("");
+  }
+
+  // GDB will inspect contents of this descriptor.
+  // Static initialization is necessary to prevent GDB from seeing
+  // uninitialized descriptor.
+  JITDescriptor __jit_debug_descriptor = { 1, JIT_NOACTION, nullptr, nullptr };
+}
+
+JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size) {
+  JITCodeEntry* entry = new JITCodeEntry;
+  entry->symfile_addr_ = symfile_addr;
+  entry->symfile_size_ = symfile_size;
+  entry->prev_ = nullptr;
+
+  // TODO: Do we need a lock here?
+  entry->next_ = __jit_debug_descriptor.first_entry_;
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry;
+  }
+  __jit_debug_descriptor.first_entry_ = entry;
+  __jit_debug_descriptor.relevant_entry_ = entry;
+
+  __jit_debug_descriptor.action_flag_ = JIT_REGISTER_FN;
+  __jit_debug_register_code();
+  return entry;
+}
+
+void DeleteJITCodeEntry(JITCodeEntry* entry) {
+  // TODO: Do we need a lock here?
+  if (entry->prev_ != nullptr) {
+    entry->prev_->next_ = entry->next_;
+  } else {
+    __jit_debug_descriptor.first_entry_ = entry->next_;
+  }
+
+  if (entry->next_ != nullptr) {
+    entry->next_->prev_ = entry->prev_;
+  }
+
+  __jit_debug_descriptor.relevant_entry_ = entry;
+  __jit_debug_descriptor.action_flag_ = JIT_UNREGISTER_FN;
+  __jit_debug_register_code();
+  delete entry;
+}
+
+}  // namespace art
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
new file mode 100644
index 0000000..a784ef5
--- /dev/null
+++ b/runtime/jit/debugger_interface.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+#define ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
+
+#include <inttypes.h>
+
+namespace art {
+
+extern "C" {
+  struct JITCodeEntry;
+}
+
+// Notify native debugger about new JITed code by passing in-memory ELF.
+JITCodeEntry* CreateJITCodeEntry(const uint8_t *symfile_addr, uintptr_t symfile_size);
+
+// Notify native debugger that JITed code has been removed.
+void DeleteJITCodeEntry(JITCodeEntry* entry);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_DEBUGGER_INTERFACE_H_
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index f241308..ab70f4c 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -53,10 +53,10 @@
 }
 
 void Jit::DumpInfo(std::ostream& os) {
-  os << "Code cache size=" << PrettySize(code_cache_->CodeCacheSize())
-     << " data cache size=" << PrettySize(code_cache_->DataCacheSize())
-     << " number of compiled code=" << code_cache_->NumberOfCompiledCode()
-     << "\n";
+  os << "JIT code cache size=" << PrettySize(code_cache_->CodeCacheSize()) << "\n"
+     << "JIT data cache size=" << PrettySize(code_cache_->DataCacheSize()) << "\n"
+     << "JIT current capacity=" << PrettySize(code_cache_->GetCurrentCapacity()) << "\n"
+     << "JIT number of compiled code=" << code_cache_->NumberOfCompiledCode() << "\n";
   cumulative_timings_.Dump(os);
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e600a97..0edce2f 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -75,6 +75,10 @@
   void StartProfileSaver(const std::string& filename, const std::vector<std::string>& code_paths);
   void StopProfileSaver();
 
+  void DumpForSigQuit(std::ostream& os) {
+    DumpInfo(os);
+  }
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 905b277..1c842e4 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -154,6 +154,11 @@
 
   uint64_t GetLastUpdateTimeNs() const;
 
+  size_t GetCurrentCapacity() REQUIRES(!lock_) {
+    MutexLock lock(Thread::Current(), lock_);
+    return current_capacity_;
+  }
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index dcb346c..3820592 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -71,12 +71,12 @@
       break;
     }
   }
-  DCHECK(cache != nullptr);
   return cache;
 }
 
 void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
   InlineCache* cache = GetInlineCache(dex_pc);
+  CHECK(cache != nullptr) << PrettyMethod(method_) << "@" << dex_pc;
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 5e3fa19..cb67ee3 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -316,12 +316,7 @@
 static ArtMethod* FindMethod(mirror::Class* c, const StringPiece& name, const StringPiece& sig)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  for (auto& method : c->GetDirectMethods(pointer_size)) {
-    if (kNative == method.IsNative() && name == method.GetName() && method.GetSignature() == sig) {
-      return &method;
-    }
-  }
-  for (auto& method : c->GetVirtualMethods(pointer_size)) {
+  for (auto& method : c->GetMethods(pointer_size)) {
     if (kNative == method.IsNative() && name == method.GetName() && method.GetSignature() == sig) {
       return &method;
     }
@@ -2220,13 +2215,7 @@
 
     size_t unregistered_count = 0;
     auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-    for (auto& m : c->GetDirectMethods(pointer_size)) {
-      if (m.IsNative()) {
-        m.UnregisterNative();
-        unregistered_count++;
-      }
-    }
-    for (auto& m : c->GetVirtualMethods(pointer_size)) {
+    for (auto& m : c->GetMethods(pointer_size)) {
       if (m.IsNative()) {
         m.UnregisterNative();
         unregistered_count++;
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 9e416dc..ef4fe15 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -22,13 +22,14 @@
 #include "art_field-inl.h"
 #include "art_method.h"
 #include "art_method-inl.h"
+#include "base/array_slice.h"
+#include "base/length_prefixed_array.h"
 #include "class_loader.h"
 #include "common_throws.h"
 #include "dex_cache.h"
 #include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "length_prefixed_array.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
@@ -62,61 +63,148 @@
   return GetFieldObject<DexCache, kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_));
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetDirectMethodsPtr() {
+inline uint32_t Class::GetCopiedMethodsStartOffset() {
+  return GetFieldShort(OFFSET_OF_OBJECT_MEMBER(Class, copied_methods_offset_));
+}
+
+inline uint32_t Class::GetDirectMethodsStartOffset() {
+  return 0;
+}
+
+inline uint32_t Class::GetVirtualMethodsStartOffset() {
+  return GetFieldShort(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_offset_));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSlice(size_t pointer_size) {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetDirectMethodsPtrUnchecked();
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetDirectMethodsSliceUnchecked(pointer_size);
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetDirectMethodsPtrUnchecked() {
+inline ArraySlice<ArtMethod> Class::GetDirectMethodsSliceUnchecked(size_t pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetDirectMethodsStartOffset(),
+                               GetVirtualMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetDeclaredMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetDeclaredMethodsSliceUnchecked(size_t pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetDirectMethodsStartOffset(),
+                               GetCopiedMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetDeclaredVirtualMethodsSliceUnchecked(size_t pointer_size) {
+  return ArraySlice<ArtMethod>(GetMethodsPtr(),
+                               GetVirtualMethodsStartOffset(),
+                               GetCopiedMethodsStartOffset(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetVirtualMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetVirtualMethodsSliceUnchecked(size_t pointer_size) {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               GetVirtualMethodsStartOffset(),
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  DCHECK(ValidPointerSize(pointer_size)) << pointer_size;
+  return GetCopiedMethodsSliceUnchecked(pointer_size);
+}
+
+inline ArraySlice<ArtMethod> Class::GetCopiedMethodsSliceUnchecked(size_t pointer_size) {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               GetCopiedMethodsStartOffset(),
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
+}
+
+inline LengthPrefixedArray<ArtMethod>* Class::GetMethodsPtr() {
   return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_)));
+      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, methods_)));
 }
 
-inline LengthPrefixedArray<ArtMethod>* Class::GetVirtualMethodsPtrUnchecked() {
-  return reinterpret_cast<LengthPrefixedArray<ArtMethod>*>(
-      GetField64(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_)));
+template<VerifyObjectFlags kVerifyFlags>
+inline ArraySlice<ArtMethod> Class::GetMethodsSlice(size_t pointer_size) {
+  DCHECK(IsLoaded() || IsErroneous());
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return ArraySlice<ArtMethod>(methods,
+                               0,
+                               NumMethods(),
+                               ArtMethod::Size(pointer_size),
+                               ArtMethod::Alignment(pointer_size));
 }
 
-inline void Class::SetDirectMethodsPtr(LengthPrefixedArray<ArtMethod>* new_direct_methods) {
-  DCHECK(GetDirectMethodsPtrUnchecked() == nullptr);
-  SetDirectMethodsPtrUnchecked(new_direct_methods);
-}
 
-inline void Class::SetDirectMethodsPtrUnchecked(
-    LengthPrefixedArray<ArtMethod>* new_direct_methods) {
-  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_),
-                    reinterpret_cast<uint64_t>(new_direct_methods));
+inline uint32_t Class::NumMethods() {
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  return (methods == nullptr) ? 0 : methods->size();
 }
 
 inline ArtMethod* Class::GetDirectMethodUnchecked(size_t i, size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  auto* methods = GetDirectMethodsPtrUnchecked();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetDirectMethodsSliceUnchecked(pointer_size).At(i);
 }
 
 inline ArtMethod* Class::GetDirectMethod(size_t i, size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  auto* methods = GetDirectMethodsPtr();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetDirectMethodsSlice(pointer_size).At(i);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
-inline LengthPrefixedArray<ArtMethod>* Class::GetVirtualMethodsPtr() {
-  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
-  return GetVirtualMethodsPtrUnchecked();
+inline void Class::SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
+                                 uint32_t num_direct,
+                                 uint32_t num_virtual) {
+  DCHECK(GetMethodsPtr() == nullptr);
+  SetMethodsPtrUnchecked(new_methods, num_direct, num_virtual);
 }
 
-inline void Class::SetVirtualMethodsPtr(LengthPrefixedArray<ArtMethod>* new_virtual_methods) {
-  // TODO: we reassign virtual methods to grow the table for miranda
-  // methods.. they should really just be assigned once.
-  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_),
-                    reinterpret_cast<uint64_t>(new_virtual_methods));
+
+inline void Class::SetMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_methods,
+                                          uint32_t num_direct,
+                                          uint32_t num_virtual) {
+  DCHECK_LE(num_direct + num_virtual, (new_methods == nullptr) ? 0 : new_methods->size());
+  SetMethodsPtrInternal(new_methods);
+  SetFieldShort<false>(OFFSET_OF_OBJECT_MEMBER(Class, copied_methods_offset_),
+                    dchecked_integral_cast<uint16_t>(num_direct + num_virtual));
+  SetFieldShort<false>(OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_offset_),
+                       dchecked_integral_cast<uint16_t>(num_direct));
+}
+
+inline void Class::SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods) {
+  SetField64<false>(OFFSET_OF_OBJECT_MEMBER(Class, methods_),
+                    reinterpret_cast<uint64_t>(new_methods));
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -135,11 +223,7 @@
 
 inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  LengthPrefixedArray<ArtMethod>* methods = GetVirtualMethodsPtrUnchecked();
-  DCHECK(methods != nullptr);
-  return &methods->At(i,
-                      ArtMethod::Size(pointer_size),
-                      ArtMethod::Alignment(pointer_size));
+  return &GetVirtualMethodsSliceUnchecked(pointer_size).At(i);
 }
 
 inline PointerArray* Class::GetVTable() {
@@ -833,24 +917,42 @@
       CHECK_EQ(field.GetDeclaringClass(), this) << GetStatus();
     }
   }
-  for (ArtMethod& method : GetDirectMethods(pointer_size)) {
-    method.VisitRoots(visitor, pointer_size);
-  }
-  for (ArtMethod& method : GetVirtualMethods(pointer_size)) {
+  for (ArtMethod& method : GetMethods(pointer_size)) {
     method.VisitRoots(visitor, pointer_size);
   }
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MakeIterationRangeFromLengthPrefixedArray(GetDirectMethodsPtrUnchecked(),
-                                                   ArtMethod::Size(pointer_size),
-                                                   ArtMethod::Alignment(pointer_size));
+  return GetDirectMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredMethods(
+      size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetDeclaredMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetDeclaredVirtualMethods(
+      size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetDeclaredVirtualMethodsSliceUnchecked(pointer_size).AsRange();
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetVirtualMethods(size_t pointer_size) {
   CheckPointerSize(pointer_size);
-  return MakeIterationRangeFromLengthPrefixedArray(GetVirtualMethodsPtrUnchecked(),
+  return GetVirtualMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetCopiedMethods(size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return GetCopiedMethodsSliceUnchecked(pointer_size).AsRange();
+}
+
+
+inline IterationRange<StrideIterator<ArtMethod>> Class::GetMethods(size_t pointer_size) {
+  CheckPointerSize(pointer_size);
+  return MakeIterationRangeFromLengthPrefixedArray(GetMethodsPtr(),
                                                    ArtMethod::Size(pointer_size),
                                                    ArtMethod::Alignment(pointer_size));
 }
@@ -918,13 +1020,15 @@
 }
 
 inline uint32_t Class::NumDirectMethods() {
-  LengthPrefixedArray<ArtMethod>* arr = GetDirectMethodsPtrUnchecked();
-  return arr != nullptr ? arr->size() : 0u;
+  return GetVirtualMethodsStartOffset();
+}
+
+inline uint32_t Class::NumDeclaredVirtualMethods() {
+  return GetCopiedMethodsStartOffset() - GetVirtualMethodsStartOffset();
 }
 
 inline uint32_t Class::NumVirtualMethods() {
-  LengthPrefixedArray<ArtMethod>* arr = GetVirtualMethodsPtrUnchecked();
-  return arr != nullptr ? arr->size() : 0u;
+  return NumMethods() - GetVirtualMethodsStartOffset();
 }
 
 inline uint32_t Class::NumInstanceFields() {
@@ -952,16 +1056,11 @@
   if (ifields != new_ifields) {
     dest->SetIFieldsPtrUnchecked(new_ifields);
   }
-  // Update direct and virtual method arrays.
-  LengthPrefixedArray<ArtMethod>* direct_methods = GetDirectMethodsPtr();
-  LengthPrefixedArray<ArtMethod>* new_direct_methods = visitor(direct_methods);
-  if (direct_methods != new_direct_methods) {
-    dest->SetDirectMethodsPtrUnchecked(new_direct_methods);
-  }
-  LengthPrefixedArray<ArtMethod>* virtual_methods = GetVirtualMethodsPtr();
-  LengthPrefixedArray<ArtMethod>* new_virtual_methods = visitor(virtual_methods);
-  if (virtual_methods != new_virtual_methods) {
-    dest->SetVirtualMethodsPtr(new_virtual_methods);
+  // Update method array.
+  LengthPrefixedArray<ArtMethod>* methods = GetMethodsPtr();
+  LengthPrefixedArray<ArtMethod>* new_methods = visitor(methods);
+  if (methods != new_methods) {
+    dest->SetMethodsPtrInternal(new_methods);
   }
   // Update dex cache strings.
   GcRoot<mirror::String>* strings = GetDexCacheStrings();
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 05a9039..66060f2 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -457,6 +457,10 @@
   return nullptr;
 }
 
+// TODO These should maybe be changed to be named FindOwnedVirtualMethod or something similar
+// because they do not only find 'declared' methods and will return copied methods. This behavior is
+// desired and correct but the naming can lead to confusion because in the java language declared
+// excludes interface methods which might be found by this.
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
                                             size_t pointer_size) {
   for (auto& method : GetVirtualMethods(pointer_size)) {
@@ -482,10 +486,8 @@
 ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
                                             size_t pointer_size) {
   if (GetDexCache() == dex_cache) {
-    for (auto& method : GetVirtualMethods(pointer_size)) {
-      // A miranda method may have a different DexCache and is always created by linking,
-      // never *declared* in the class.
-      if (method.GetDexMethodIndex() == dex_method_idx && !method.IsMiranda()) {
+    for (auto& method : GetDeclaredVirtualMethods(pointer_size)) {
+      if (method.GetDexMethodIndex() == dex_method_idx) {
         return &method;
       }
     }
@@ -725,12 +727,7 @@
 
 void Class::SetPreverifiedFlagOnAllMethods(size_t pointer_size) {
   DCHECK(IsVerified());
-  for (auto& m : GetDirectMethods(pointer_size)) {
-    if (!m.IsNative() && m.IsInvokable()) {
-      m.SetPreverified();
-    }
-  }
-  for (auto& m : GetVirtualMethods(pointer_size)) {
+  for (auto& m : GetMethods(pointer_size)) {
     if (!m.IsNative() && m.IsInvokable()) {
       m.SetPreverified();
     }
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 0ab5b97..ce879ba 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -44,6 +44,7 @@
 struct ClassOffsets;
 template<class T> class Handle;
 template<typename T> class LengthPrefixedArray;
+template<typename T> class ArraySlice;
 class Signature;
 class StringPiece;
 template<size_t kNumReferences> class PACKED(4) StackHandleScope;
@@ -702,12 +703,24 @@
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  LengthPrefixedArray<ArtMethod>* GetDirectMethodsPtr() SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetMethodsPtr()
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetDirectMethodsPtr(LengthPrefixedArray<ArtMethod>* new_direct_methods)
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetMethods(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void SetMethodsPtr(LengthPrefixedArray<ArtMethod>* new_methods,
+                     uint32_t num_direct,
+                     uint32_t num_virtual)
       SHARED_REQUIRES(Locks::mutator_lock_);
   // Used by image writer.
-  void SetDirectMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_direct_methods)
+  void SetMethodsPtrUnchecked(LengthPrefixedArray<ArtMethod>* new_methods,
+                              uint32_t num_direct,
+                              uint32_t num_virtual)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSlice(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetDirectMethod(size_t i, size_t pointer_size)
@@ -723,18 +736,50 @@
   ALWAYS_INLINE uint32_t NumDirectMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtr()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredMethods(
+        size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDeclaredVirtualMethods(
+        size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSlice(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetCopiedMethods(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSlice(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetVirtualMethods(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void SetVirtualMethodsPtr(LengthPrefixedArray<ArtMethod>* new_virtual_methods)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Returns the number of non-inherited virtual methods.
+  // Returns the number of non-inherited virtual methods (sum of declared and copied methods).
   ALWAYS_INLINE uint32_t NumVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns the number of copied virtual methods.
+  ALWAYS_INLINE uint32_t NumCopiedVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Returns the number of declared virtual methods.
+  ALWAYS_INLINE uint32_t NumDeclaredVirtualMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE uint32_t NumMethods() SHARED_REQUIRES(Locks::mutator_lock_);
+
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtMethod* GetVirtualMethod(size_t i, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1155,10 +1200,19 @@
     return pointer_size;
   }
 
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetDirectMethodsPtrUnchecked()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDirectMethodsSliceUnchecked(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtrUnchecked()
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetVirtualMethodsSliceUnchecked(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredMethodsSliceUnchecked(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetDeclaredVirtualMethodsSliceUnchecked(size_t pointer_size)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSliceUnchecked(size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Fix up all of the native pointers in the class by running them through the visitor. Only sets
@@ -1169,6 +1223,9 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
+  ALWAYS_INLINE void SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetVerifyError(Object* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool throw_on_failure, bool use_referrers_cache>
@@ -1194,6 +1251,15 @@
   IterationRange<StrideIterator<ArtField>> GetIFieldsUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // The index in the methods_ array where the first declared virtual method is.
+  ALWAYS_INLINE uint32_t GetVirtualMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The index in the methods_ array where the first direct method is.
+  ALWAYS_INLINE uint32_t GetDirectMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // The index in the methods_ array where the first copied method is.
+  ALWAYS_INLINE uint32_t GetCopiedMethodsStartOffset() SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Check that the pointer size matches the one in the class linker.
@@ -1206,6 +1272,9 @@
   void VisitReferences(mirror::Class* klass, const Visitor& visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // 'Class' Object Fields
+  // Order governed by java field ordering. See art::ClassLinker::LinkFields.
+
   // Defining class loader, or null for the "bootstrap" system loader.
   HeapReference<ClassLoader> class_loader_;
 
@@ -1254,9 +1323,6 @@
   // Short cuts to dex_cache_ member for fast compiled code access.
   uint64_t dex_cache_strings_;
 
-  // static, private, and <init> methods. Pointer to an ArtMethod length-prefixed array.
-  uint64_t direct_methods_;
-
   // instance fields
   //
   // These describe the layout of the contents of an Object.
@@ -1268,13 +1334,24 @@
   // ArtFields.
   uint64_t ifields_;
 
+  // Pointer to an ArtMethod length-prefixed array. All the methods where this class is the place
+  // where they are logically defined. This includes all private, static, final and virtual methods
+  // as well as inherited default methods and miranda methods.
+  //
+  // The slice methods_ [0, virtual_methods_offset_) are the direct (static, private, init) methods
+  // declared by this class.
+  //
+  // The slice methods_ [virtual_methods_offset_, copied_methods_offset_) are the virtual methods
+  // declared by this class.
+  //
+  // The slice methods_ [copied_methods_offset_, |methods_|) are the methods that are copied from
+  // interfaces such as miranda or default methods. These are copied for resolution purposes as this
+  // class is where they are (logically) declared as far as the virtual dispatch is concerned.
+  uint64_t methods_;
+
   // Static fields length-prefixed array.
   uint64_t sfields_;
 
-  // Virtual methods defined in this class; invoked through vtable. Pointer to an ArtMethod
-  // length-prefixed array.
-  uint64_t virtual_methods_;
-
   // Access flags; low 16 bits are defined by VM spec.
   uint32_t access_flags_;
 
@@ -1317,6 +1394,14 @@
   // State of class initialization.
   Status status_;
 
+  // The offset of the first virtual method that is copied from an interface. This includes miranda,
+  // default, and default-conflict methods. Having a hard limit of ((2 << 16) - 1) for methods
+  // defined on a single class is well established in Java so we will use only uint16_t's here.
+  uint16_t copied_methods_offset_;
+
+  // The offset of the first declared virtual methods in the methods_ array.
+  uint16_t virtual_methods_offset_;
+
   // TODO: ?
   // initiating class loader list
   // NOTE: for classes with low serialNumber, these are unused, and the
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 5e42392..1977481 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -439,16 +439,9 @@
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis));
   size_t num_methods = 0;
-  for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(sizeof(void*))) {
     auto modifiers = m.GetAccessFlags();
-    if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
-        (modifiers & kAccMiranda) == 0) {
-      ++num_methods;
-    }
-  }
-  for (auto& m : klass->GetDirectMethods(sizeof(void*))) {
-    auto modifiers = m.GetAccessFlags();
-    // Add non-constructor direct/static methods.
+    // Add non-constructor declared methods.
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
       ++num_methods;
@@ -457,22 +450,9 @@
   auto ret = hs.NewHandle(mirror::ObjectArray<mirror::Method>::Alloc(
       soa.Self(), mirror::Method::ArrayClass(), num_methods));
   num_methods = 0;
-  for (auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : klass->GetDeclaredMethods(sizeof(void*))) {
     auto modifiers = m.GetAccessFlags();
     if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
-        (modifiers & kAccMiranda) == 0) {
-      auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
-      if (method == nullptr) {
-        soa.Self()->AssertPendingException();
-        return nullptr;
-      }
-      ret->SetWithoutChecks<false>(num_methods++, method);
-    }
-  }
-  for (auto& m : klass->GetDirectMethods(sizeof(void*))) {
-    auto modifiers = m.GetAccessFlags();
-    // Add non-constructor direct/static methods.
-    if ((publicOnly == JNI_FALSE || (modifiers & kAccPublic) != 0) &&
         (modifiers & kAccConstructor) == 0) {
       auto* method = mirror::Method::CreateFromArtMethod(soa.Self(), &m);
       if (method == nullptr) {
@@ -673,7 +653,8 @@
     }
   }
   auto* constructor = klass->GetDeclaredConstructor(
-      soa.Self(), NullHandle<mirror::ObjectArray<mirror::Class>>());
+      soa.Self(),
+      ScopedNullHandle<mirror::ObjectArray<mirror::Class>>());
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index 4a1e6c2..f42a17d 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -68,7 +68,8 @@
 }
 
 static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader,
-                                  jstring javaLdLibraryPath, jstring javaIsolationPath) {
+                                  jboolean isSharedNamespace, jstring javaLibrarySearchPath,
+                                  jstring javaLibraryPermittedPath) {
   ScopedUtfChars filename(env, javaFilename);
   if (filename.c_str() == nullptr) {
     return nullptr;
@@ -80,14 +81,19 @@
   // linker namespace instead of global LD_LIBRARY_PATH
   // (23 is Marshmallow)
   if (target_sdk_version <= INT_MAX) {
-    SetLdLibraryPath(env, javaLdLibraryPath);
+    SetLdLibraryPath(env, javaLibrarySearchPath);
   }
 
   std::string error_msg;
   {
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-    bool success = vm->LoadNativeLibrary(env, filename.c_str(), javaLoader,
-                                         javaLdLibraryPath, javaIsolationPath, &error_msg);
+    bool success = vm->LoadNativeLibrary(env,
+                                         filename.c_str(),
+                                         javaLoader,
+                                         isSharedNamespace == JNI_TRUE,
+                                         javaLibrarySearchPath,
+                                         javaLibraryPermittedPath,
+                                         &error_msg);
     if (success) {
       return nullptr;
     }
@@ -115,7 +121,7 @@
   NATIVE_METHOD(Runtime, gc, "()V"),
   NATIVE_METHOD(Runtime, maxMemory, "!()J"),
   NATIVE_METHOD(Runtime, nativeExit, "(I)V"),
-  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
+  NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;ZLjava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
   NATIVE_METHOD(Runtime, totalMemory, "!()J"),
 };
 
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index c75ff78..13edd67 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -89,7 +89,7 @@
     case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
     case kWaitingForMethodTracingStart:   return kJavaWaiting;
     case kWaitingForVisitObjects:         return kJavaWaiting;
-    case kWaitingWeakGcRootRead:          return kJavaWaiting;
+    case kWaitingWeakGcRootRead:          return kJavaRunnable;
     case kWaitingForGcThreadFlip:         return kJavaWaiting;
     case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index caacba6..d7cf62e 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -71,7 +71,7 @@
     mirror::Class* klass = method->GetDeclaringClass();
     int throws_index = -1;
     size_t i = 0;
-    for (const auto& m : klass->GetVirtualMethods(sizeof(void*))) {
+    for (const auto& m : klass->GetDeclaredVirtualMethods(sizeof(void*))) {
       if (&m == method) {
         throws_index = i;
         break;
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 46cc5aa..61a1085 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -45,10 +45,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t native_method_count = 0;
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    native_method_count += m.IsNative() ? 1u : 0u;
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(sizeof(void*))) {
     native_method_count += m.IsNative() ? 1u : 0u;
   }
   return native_method_count;
@@ -63,19 +60,7 @@
   mirror::Class* c = soa.Decode<mirror::Class*>(clazz);
 
   uint32_t count = 0;
-  for (auto& m : c->GetDirectMethods(sizeof(void*))) {
-    if (m.IsNative()) {
-      if (count < method_count) {
-        methods[count].name = m.GetName();
-        methods[count].signature = m.GetShorty();
-        methods[count].fnPtr = m.GetEntryPointFromJni();
-        count++;
-      } else {
-        LOG(WARNING) << "Output native method array too small. Skipping " << PrettyMethod(&m);
-      }
-    }
-  }
-  for (auto& m : c->GetVirtualMethods(sizeof(void*))) {
+  for (auto& m : c->GetMethods(sizeof(void*))) {
     if (m.IsNative()) {
       if (count < method_count) {
         methods[count].name = m.GetName();
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index 06b40fd..c32d76c 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -18,6 +18,8 @@
 
 #include <memory>
 
+#include "arch/instruction_set.h"
+#include "base/stringprintf.h"
 #include "common_runtime_test.h"
 
 namespace art {
@@ -113,6 +115,40 @@
   EXPECT_TRUE(map.Exists(Opt::GcOption));
 
   XGcOption xgc = map.GetOrDefault(Opt::GcOption);
-  EXPECT_EQ(gc::kCollectorTypeMC, xgc.collector_type_);}
+  EXPECT_EQ(gc::kCollectorTypeMC, xgc.collector_type_);
+}
+
+TEST_F(ParsedOptionsTest, ParsedOptionsInstructionSet) {
+  using Opt = RuntimeArgumentMap;
+
+  {
+    // Nothing set, should be kRuntimeISA.
+    RuntimeOptions options;
+    RuntimeArgumentMap map;
+    bool parsed = ParsedOptions::Parse(options, false, &map);
+    ASSERT_TRUE(parsed);
+    InstructionSet isa = map.GetOrDefault(Opt::ImageInstructionSet);
+    EXPECT_EQ(kRuntimeISA, isa);
+  }
+
+  const char* isa_strings[] = { "arm", "arm64", "x86", "x86_64", "mips", "mips64" };
+  InstructionSet ISAs[] = { InstructionSet::kArm,
+                            InstructionSet::kArm64,
+                            InstructionSet::kX86,
+                            InstructionSet::kX86_64,
+                            InstructionSet::kMips,
+                            InstructionSet::kMips64 };
+  static_assert(arraysize(isa_strings) == arraysize(ISAs), "Need same amount.");
+
+  for (size_t i = 0; i < arraysize(isa_strings); ++i) {
+    RuntimeOptions options;
+    options.push_back(std::make_pair("imageinstructionset", isa_strings[i]));
+    RuntimeArgumentMap map;
+    bool parsed = ParsedOptions::Parse(options, false, &map);
+    ASSERT_TRUE(parsed);
+    InstructionSet isa = map.GetOrDefault(Opt::ImageInstructionSet);
+    EXPECT_EQ(ISAs[i], isa);
+  }
+}
 
 }  // namespace art
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 57472ad..4d9ca6d 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -79,7 +79,7 @@
             mirror::Method::CreateFromArtMethod(soa.Self(), method)));
     // Now adds all interfaces virtual methods.
     for (mirror::Class* interface : interfaces) {
-      for (auto& m : interface->GetVirtualMethods(sizeof(void*))) {
+      for (auto& m : interface->GetDeclaredVirtualMethods(sizeof(void*))) {
         soa.Env()->SetObjectArrayElement(
             proxyClassMethods, array_index++, soa.AddLocalReference<jobject>(
                 mirror::Method::CreateFromArtMethod(soa.Self(), &m)));
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b18d415..d241112 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1214,7 +1214,9 @@
   // the library that implements System.loadLibrary!
   {
     std::string error_msg;
-    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr, nullptr, nullptr, &error_msg)) {
+    if (!java_vm_->LoadNativeLibrary(env, "libjavacore.so", nullptr,
+                                     /* is_shared_namespace */ false,
+                                     nullptr, nullptr, &error_msg)) {
       LOG(FATAL) << "LoadNativeLibrary failed for \"libjavacore.so\": " << error_msg;
     }
   }
@@ -1295,6 +1297,11 @@
   GetInternTable()->DumpForSigQuit(os);
   GetJavaVM()->DumpForSigQuit(os);
   GetHeap()->DumpForSigQuit(os);
+  if (GetJit() != nullptr) {
+    GetJit()->DumpForSigQuit(os);
+  } else {
+    os << "Running non JIT\n";
+  }
   TrackedAllocators::Dump(os);
   os << "\n";
 
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 2b778d9..a47a08e 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -381,7 +381,7 @@
   ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime"));
   java_lang_Runtime_nativeLoad =
       CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad",
-                  "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)"
+                  "(Ljava/lang/String;Ljava/lang/ClassLoader;ZLjava/lang/String;Ljava/lang/String;)"
                       "Ljava/lang/String;");
 }
 
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 98251e4..cadf706 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -52,6 +52,11 @@
   int j;
 }
 
+class TestClass3 {
+  float floatField = 8.0f;
+  boolean test1 = true;
+}
+
 class Finalizable {
   static boolean sVisited = false;
   static final int VALUE = 0xbeef;
@@ -484,27 +489,32 @@
     return obj;
   }
 
-  /// CHECK-START: void Main.test21() load_store_elimination (before)
+  /// CHECK-START: void Main.test21(TestClass) load_store_elimination (before)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
-  /// CHECK: StaticFieldSet
-  /// CHECK: StaticFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
 
-  /// CHECK-START: void Main.test21() load_store_elimination (after)
+  /// CHECK-START: void Main.test21(TestClass) load_store_elimination (after)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
-  /// CHECK: StaticFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldGet
 
   // Loop side effects can kill heap values, stores need to be kept in that case.
-  static void test21() {
+  static void test21(TestClass obj0) {
     TestClass obj = new TestClass();
+    obj0.str = "abc";
     obj.str = "abc";
     for (int i = 0; i < 2; i++) {
-      // Generate some loop side effect that does write.
-      obj.si = 1;
+      // Generate some loop side effect that writes into obj.
+      obj.str = "def";
     }
-    System.out.print(obj.str.substring(0, 0));
+    System.out.print(obj0.str.substring(0, 0) + obj.str.substring(0, 0));
   }
 
   /// CHECK-START: int Main.test22() load_store_elimination (before)
@@ -520,27 +530,29 @@
 
   /// CHECK-START: int Main.test22() load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK: NewInstance
   /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
   /// CHECK: NewInstance
   /// CHECK-NOT: InstanceFieldSet
-  /// CHECK: InstanceFieldGet
+  /// CHECK-NOT: InstanceFieldGet
   /// CHECK-NOT: InstanceFieldGet
 
-  // Loop side effects only affects stores into singletons that dominiates the loop header.
+  // For a singleton, loop side effects can kill its field values only if:
+  // (1) it dominiates the loop header, and
+  // (2) its fields are stored into inside a loop.
   static int test22() {
     int sum = 0;
     TestClass obj1 = new TestClass();
-    obj1.i = 2;       // This store can't be eliminated since it can be killed by loop side effects.
+    obj1.i = 2;    // This store can be eliminated since obj1 is never stored into inside a loop.
     for (int i = 0; i < 2; i++) {
       TestClass obj2 = new TestClass();
-      obj2.i = 3;    // This store can be eliminated since the singleton is inside the loop.
+      obj2.i = 3;  // This store can be eliminated since the singleton is inside the loop.
       sum += obj2.i;
     }
     TestClass obj3 = new TestClass();
-    obj3.i = 5;      // This store can be eliminated since the singleton is created after the loop.
+    obj3.i = 5;    // This store can be eliminated since the singleton is created after the loop.
     sum += obj1.i + obj3.i;
     return sum;
   }
@@ -577,6 +589,37 @@
     return obj.i;
   }
 
+  /// CHECK-START: float Main.test24() load_store_elimination (before)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Obj:l\d+>>      NewInstance
+  /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<True>>]
+  /// CHECK-DAG:                       InstanceFieldSet [<<Obj>>,<<Float8>>]
+  /// CHECK-DAG:     <<GetTest:z\d+>>  InstanceFieldGet [<<Obj>>]
+  /// CHECK-DAG:                       If [<<GetTest>>]
+  /// CHECK-DAG:     <<GetField:f\d+>> InstanceFieldGet [<<Obj>>]
+  /// CHECK-DAG:     <<Phi:f\d+>>      Phi [<<Float42>>,<<GetField>>]
+  /// CHECK-DAG:                       Return [<<Phi>>]
+
+  /// CHECK-START: float Main.test24() load_store_elimination (after)
+  /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
+  /// CHECK-DAG:     <<Float8:f\d+>>   FloatConstant 8
+  /// CHECK-DAG:     <<Float42:f\d+>>  FloatConstant 42
+  /// CHECK-DAG:     <<Obj:l\d+>>      NewInstance
+  /// CHECK-DAG:                       If [<<True>>]
+  /// CHECK-DAG:     <<Phi:f\d+>>      Phi [<<Float42>>,<<Float8>>]
+  /// CHECK-DAG:                       Return [<<Phi>>]
+
+  static float test24() {
+    float a = 42.0f;
+    TestClass3 obj = new TestClass3();
+    if (obj.test1) {
+      a = obj.floatField;
+    }
+    return a;
+  }
+
   /// CHECK-START: void Main.testFinalizable() load_store_elimination (before)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -679,10 +722,11 @@
     float[] fa2 = { 1.8f };
     assertFloatEquals(test19(fa1, fa2), 1.8f);
     assertFloatEquals(test20().i, 0);
-    test21();
+    test21(new TestClass());
     assertIntEquals(test22(), 13);
     assertIntEquals(test23(true), 4);
     assertIntEquals(test23(false), 5);
+    assertFloatEquals(test24(), 8.0f);
     testFinalizableByForcingGc();
   }
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 7589f8f..a616395 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -38,11 +38,13 @@
   $(HOST_OUT_EXECUTABLES)/jasmin \
   $(HOST_OUT_EXECUTABLES)/smali \
   $(HOST_OUT_EXECUTABLES)/dexmerger
+TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES :=
 
 ifeq ($(ANDROID_COMPILE_WITH_JACK),true)
   TEST_ART_RUN_TEST_DEPENDENCIES += \
     $(JACK) \
     $(JILL_JAR)
+  TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES += setup-jack-server
 endif
 
 ifeq ($(ART_TEST_DEBUG_GC),true)
@@ -63,7 +65,7 @@
     run_test_options += --quiet
   endif
 $$(dmart_target): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
+$$(dmart_target): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(TARGET_JACK_CLASSPATH_DEPENDENCIES) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) rm -rf $$(dir $$@) && mkdir -p $$(dir $$@)
 	$(hide) DX=$(abspath $(DX)) JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
 	  SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
@@ -896,7 +898,7 @@
 $$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
 $$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath)
 .PHONY: $$(run_test_rule_name)
-$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule)
+$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  DX=$(abspath $(DX)) \
 	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index a2d2f23..b323456 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -172,10 +172,9 @@
   bug: 25437292
 },
 {
-  description: "JSR166TestCase.waitForThreadToEnterWaitState seems to time out; needs investigation.",
+  description: "Assertion failing on the concurrent collector configuration.",
   result: EXEC_FAILED,
-  names: ["jsr166.LinkedTransferQueueTest#testTransfer2",
-          "jsr166.LinkedTransferQueueTest#testWaitingConsumer"],
+  names: ["jsr166.LinkedTransferQueueTest#testTransfer2"],
   bug: 25883050
 }
 ]