Merge "Revert "Re-enable test that was causing TimeoutExceptions on ARM64.""
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 02bce41..2294ddb 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -114,8 +114,7 @@
 else
 ART_TARGET_CLANG := false
 endif
-# b/25130937
-ART_TARGET_CLANG_arm := false
+ART_TARGET_CLANG_arm :=
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
 ART_TARGET_CLANG_mips64 :=
@@ -335,7 +334,6 @@
 art_debug_cflags := \
   $(ART_DEBUG_OPT_FLAG) \
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
-  -DVIXL_DEBUG \
   -UNDEBUG
 
 art_host_non_debug_cflags := $(art_non_debug_cflags)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index b3832ac..33242f1 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -517,7 +517,8 @@
 valgrind-$$(gtest_rule): $$(gtest_exe) $$(gtest_deps) $(ART_VALGRIND_DEPENDENCIES)
 	$(hide) $$(call ART_TEST_SKIP,$$@) && \
 	  VALGRIND_LIB=$(HOST_OUT)/lib64/valgrind \
-	  $(HOST_OUT_EXECUTABLES)/valgrind --leak-check=full --error-exitcode=1 $$< && \
+	  $(HOST_OUT_EXECUTABLES)/valgrind --leak-check=full --error-exitcode=1 \
+	    --suppressions=art/test/valgrind-suppressions.txt $$< && \
 	    $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
 
   ART_TEST_HOST_VALGRIND_GTEST$$($(2)ART_PHONY_TEST_HOST_SUFFIX)_RULES += valgrind-$$(gtest_rule)
@@ -573,7 +574,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,debug))
-    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixld
+    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
     LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
@@ -611,7 +612,7 @@
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
     LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS)
-    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld
+    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 7a257b6..11ee6dd 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -330,9 +330,9 @@
   # Vixl assembly support for ARM64 targets.
   ifeq ($$(art_ndebug_or_debug),debug)
     ifeq ($$(art_static_or_shared), static)
-      LOCAL_WHOLESTATIC_LIBRARIES += libvixld
+      LOCAL_WHOLESTATIC_LIBRARIES += libvixl
     else
-      LOCAL_SHARED_LIBRARIES += libvixld
+      LOCAL_SHARED_LIBRARIES += libvixl
     endif
   else
     ifeq ($$(art_static_or_shared), static)
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 239bc59..6075cd6 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -187,7 +187,9 @@
   }
 }
 
-void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa) {
+void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind,
+                                              InstructionSet isa,
+                                              size_t number_of_threads) {
   compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
                                             verification_results_.get(),
                                             method_inliner_map_.get(),
@@ -198,7 +200,7 @@
                                             GetImageClasses(),
                                             GetCompiledClasses(),
                                             GetCompiledMethods(),
-                                            /* thread_count */ 2,
+                                            number_of_threads,
                                             /* dump_stats */ true,
                                             /* dump_passes */ true,
                                             timer_.get(),
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 7e0fbab..7c2c844 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -93,7 +93,7 @@
                             const char* method_name, const char* signature)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa);
+  void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa, size_t number_of_threads = 2U);
 
   void ReserveImageSpace();
 
@@ -122,6 +122,13 @@
     return; \
   }
 
+// TODO: When read barrier works with all tests, get rid of this.
+#define TEST_DISABLED_FOR_READ_BARRIER() \
+  if (kUseReadBarrier) { \
+    printf("WARNING: TEST DISABLED FOR READ BARRIER\n"); \
+    return; \
+  }
+
 // TODO: When read barrier works with all compilers in use, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kQuick) { \
diff --git a/compiler/debug/dwarf/debug_line_opcode_writer.h b/compiler/debug/dwarf/debug_line_opcode_writer.h
index 58502a3..b4a4d63 100644
--- a/compiler/debug/dwarf/debug_line_opcode_writer.h
+++ b/compiler/debug/dwarf/debug_line_opcode_writer.h
@@ -36,7 +36,7 @@
 
  public:
   static constexpr int kOpcodeBase = 13;
-  static constexpr bool kDefaultIsStmt = true;
+  static constexpr bool kDefaultIsStmt = false;
   static constexpr int kLineBase = -5;
   static constexpr int kLineRange = 14;
 
@@ -81,8 +81,11 @@
     this->PushUleb128(column);
   }
 
-  void NegateStmt() {
-    this->PushUint8(DW_LNS_negate_stmt);
+  void SetIsStmt(bool is_stmt) {
+    if (is_stmt_ != is_stmt) {
+      this->PushUint8(DW_LNS_negate_stmt);
+      is_stmt_ = is_stmt;
+    }
   }
 
   void SetBasicBlock() {
@@ -112,6 +115,7 @@
     current_address_ = 0;
     current_file_ = 1;
     current_line_ = 1;
+    is_stmt_ = kDefaultIsStmt;
   }
 
   // Uncoditionally set address using the long encoding.
@@ -227,7 +231,8 @@
         code_factor_bits_(codeFactorBits),
         current_address_(0),
         current_file_(1),
-        current_line_(1) {
+        current_line_(1),
+        is_stmt_(kDefaultIsStmt) {
   }
 
  private:
@@ -244,6 +249,7 @@
   uint64_t current_address_;
   int current_file_;
   int current_line_;
+  bool is_stmt_;
   std::vector<uintptr_t> patch_locations_;
 
   DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter);
diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc
index e455d0d..2ba3af5 100644
--- a/compiler/debug/dwarf/dwarf_test.cc
+++ b/compiler/debug/dwarf/dwarf_test.cc
@@ -217,7 +217,9 @@
   DW_CHECK_NEXT("Advance Line by 2 to 3");
   opcodes.SetColumn(4);
   DW_CHECK_NEXT("Set column to 4");
-  opcodes.NegateStmt();
+  opcodes.SetIsStmt(true);
+  DW_CHECK_NEXT("Set is_stmt to 1");
+  opcodes.SetIsStmt(false);
   DW_CHECK_NEXT("Set is_stmt to 0");
   opcodes.SetBasicBlock();
   DW_CHECK_NEXT("Set basic block");
diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h
index 41bfe79..e2f0a65 100644
--- a/compiler/debug/dwarf/dwarf_test.h
+++ b/compiler/debug/dwarf/dwarf_test.h
@@ -62,7 +62,7 @@
     InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
     ScratchFile file;
     FileOutputStream output_stream(file.GetFile());
-    ElfBuilder<ElfTypes> builder(isa, &output_stream);
+    ElfBuilder<ElfTypes> builder(isa, nullptr, &output_stream);
     builder.Start();
     if (!debug_info_data_.empty()) {
       builder.WriteSection(".debug_info", &debug_info_data_);
diff --git a/compiler/debug/elf_compilation_unit.h b/compiler/debug/elf_compilation_unit.h
index f725f45..b1d89eb 100644
--- a/compiler/debug/elf_compilation_unit.h
+++ b/compiler/debug/elf_compilation_unit.h
@@ -27,8 +27,9 @@
 struct ElfCompilationUnit {
   std::vector<const MethodDebugInfo*> methods;
   size_t debug_line_offset = 0;
-  uintptr_t low_pc = std::numeric_limits<uintptr_t>::max();
-  uintptr_t high_pc = 0;
+  bool is_code_address_text_relative;  // Is the address offset from start of .text section?
+  uint64_t code_address = std::numeric_limits<uint64_t>::max();
+  uint64_t code_end = 0;
 };
 
 }  // namespace debug
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index f6d9b16..f9d33c1 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -175,18 +175,6 @@
   CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT || format == dwarf::DW_EH_FRAME_FORMAT);
   typedef typename ElfTypes::Addr Elf_Addr;
 
-  if (method_infos.empty()) {
-    return;
-  }
-
-  std::vector<uint32_t> binary_search_table;
-  std::vector<uintptr_t> patch_locations;
-  if (format == dwarf::DW_EH_FRAME_FORMAT) {
-    binary_search_table.reserve(2 * method_infos.size());
-  } else {
-    patch_locations.reserve(method_infos.size());
-  }
-
   // The methods can be written in any order.
   // Let's therefore sort them in the lexicographical order of the opcodes.
   // This has no effect on its own. However, if the final .debug_frame section is
@@ -194,17 +182,30 @@
   std::vector<const MethodDebugInfo*> sorted_method_infos;
   sorted_method_infos.reserve(method_infos.size());
   for (size_t i = 0; i < method_infos.size(); i++) {
-    sorted_method_infos.push_back(&method_infos[i]);
+    if (!method_infos[i].cfi.empty() && !method_infos[i].deduped) {
+      sorted_method_infos.push_back(&method_infos[i]);
+    }
   }
-  std::sort(
+  if (sorted_method_infos.empty()) {
+    return;
+  }
+  std::stable_sort(
       sorted_method_infos.begin(),
       sorted_method_infos.end(),
       [](const MethodDebugInfo* lhs, const MethodDebugInfo* rhs) {
-        ArrayRef<const uint8_t> l = lhs->compiled_method->GetCFIInfo();
-        ArrayRef<const uint8_t> r = rhs->compiled_method->GetCFIInfo();
+        ArrayRef<const uint8_t> l = lhs->cfi;
+        ArrayRef<const uint8_t> r = rhs->cfi;
         return std::lexicographical_compare(l.begin(), l.end(), r.begin(), r.end());
       });
 
+  std::vector<uint32_t> binary_search_table;
+  std::vector<uintptr_t> patch_locations;
+  if (format == dwarf::DW_EH_FRAME_FORMAT) {
+    binary_search_table.reserve(2 * sorted_method_infos.size());
+  } else {
+    patch_locations.reserve(sorted_method_infos.size());
+  }
+
   // Write .eh_frame/.debug_frame section.
   auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
                        ? builder->GetDebugFrame()
@@ -212,9 +213,6 @@
   {
     cfi_section->Start();
     const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
-    const Elf_Addr text_address = builder->GetText()->Exists()
-        ? builder->GetText()->GetAddress()
-        : 0;
     const Elf_Addr cfi_address = cfi_section->GetAddress();
     const Elf_Addr cie_address = cfi_address;
     Elf_Addr buffer_address = cfi_address;
@@ -224,25 +222,21 @@
     buffer_address += buffer.size();
     buffer.clear();
     for (const MethodDebugInfo* mi : sorted_method_infos) {
-      if (!mi->deduped) {  // Only one FDE per unique address.
-        ArrayRef<const uint8_t> opcodes = mi->compiled_method->GetCFIInfo();
-        if (!opcodes.empty()) {
-          const Elf_Addr code_address = text_address + mi->low_pc;
-          if (format == dwarf::DW_EH_FRAME_FORMAT) {
-            binary_search_table.push_back(
-                dchecked_integral_cast<uint32_t>(code_address));
-            binary_search_table.push_back(
-                dchecked_integral_cast<uint32_t>(buffer_address));
-          }
-          WriteFDE(is64bit, cfi_address, cie_address,
-                   code_address, mi->high_pc - mi->low_pc,
-                   opcodes, format, buffer_address, &buffer,
-                   &patch_locations);
-          cfi_section->WriteFully(buffer.data(), buffer.size());
-          buffer_address += buffer.size();
-          buffer.clear();
-        }
+      DCHECK(!mi->deduped);
+      DCHECK(!mi->cfi.empty());
+      const Elf_Addr code_address = mi->code_address +
+          (mi->is_code_address_text_relative ? builder->GetText()->GetAddress() : 0);
+      if (format == dwarf::DW_EH_FRAME_FORMAT) {
+        binary_search_table.push_back(dchecked_integral_cast<uint32_t>(code_address));
+        binary_search_table.push_back(dchecked_integral_cast<uint32_t>(buffer_address));
       }
+      WriteFDE(is64bit, cfi_address, cie_address,
+               code_address, mi->code_size,
+               mi->cfi, format, buffer_address, &buffer,
+               &patch_locations);
+      cfi_section->WriteFully(buffer.data(), buffer.size());
+      buffer_address += buffer.size();
+      buffer.clear();
     }
     cfi_section->End();
   }
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index bddb054..a6e6f8b 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -46,6 +46,7 @@
 static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
   std::vector<const char*> names;
   if (mi->code_item != nullptr) {
+    DCHECK(mi->dex_file != nullptr);
     const uint8_t* stream = mi->dex_file->GetDebugInfoStream(mi->code_item);
     if (stream != nullptr) {
       DecodeUnsignedLeb128(&stream);  // line.
@@ -117,22 +118,23 @@
 
   void Write(const ElfCompilationUnit& compilation_unit) {
     CHECK(!compilation_unit.methods.empty());
-    const Elf_Addr text_address = owner_->builder_->GetText()->Exists()
+    const Elf_Addr base_address = compilation_unit.is_code_address_text_relative
         ? owner_->builder_->GetText()->GetAddress()
         : 0;
-    const uintptr_t cu_size = compilation_unit.high_pc - compilation_unit.low_pc;
+    const uint64_t cu_size = compilation_unit.code_end - compilation_unit.code_address;
     using namespace dwarf;  // NOLINT. For easy access to DWARF constants.
 
     info_.StartTag(DW_TAG_compile_unit);
     info_.WriteString(DW_AT_producer, "Android dex2oat");
     info_.WriteData1(DW_AT_language, DW_LANG_Java);
     info_.WriteString(DW_AT_comp_dir, "$JAVA_SRC_ROOT");
-    info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc);
+    info_.WriteAddr(DW_AT_low_pc, base_address + compilation_unit.code_address);
     info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size));
     info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset);
 
     const char* last_dex_class_desc = nullptr;
     for (auto mi : compilation_unit.methods) {
+      DCHECK(mi->dex_file != nullptr);
       const DexFile* dex = mi->dex_file;
       const DexFile::CodeItem* dex_code = mi->code_item;
       const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index);
@@ -165,8 +167,8 @@
       int start_depth = info_.Depth();
       info_.StartTag(DW_TAG_subprogram);
       WriteName(dex->GetMethodName(dex_method));
-      info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc);
-      info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc-mi->low_pc));
+      info_.WriteAddr(DW_AT_low_pc, base_address + mi->code_address);
+      info_.WriteUdata(DW_AT_high_pc, mi->code_size);
       std::vector<uint8_t> expr_buffer;
       Expression expr(&expr_buffer);
       expr.WriteOpCallFrameCfa();
@@ -176,8 +178,8 @@
       // Decode dex register locations for all stack maps.
       // It might be expensive, so do it just once and reuse the result.
       std::vector<DexRegisterMap> dex_reg_maps;
-      if (mi->IsFromOptimizingCompiler()) {
-        const CodeInfo code_info(mi->compiled_method->GetVmapTable().data());
+      if (mi->code_info != nullptr) {
+        const CodeInfo code_info(mi->code_info);
         StackMapEncoding encoding = code_info.ExtractEncoding();
         for (size_t s = 0; s < code_info.GetNumberOfStackMaps(); ++s) {
           const StackMap& stack_map = code_info.GetStackMapAt(s, encoding);
@@ -200,7 +202,7 @@
           // Write the stack location of the parameter.
           const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
           const bool is64bitValue = false;
-          WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.low_pc);
+          WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.code_address);
         }
         arg_reg++;
         info_.EndTag();
@@ -219,7 +221,7 @@
           if (dex_code != nullptr) {
             // Write the stack location of the parameter.
             const uint32_t vreg = dex_code->registers_size_ - dex_code->ins_size_ + arg_reg;
-            WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.low_pc);
+            WriteRegLocation(mi, dex_reg_maps, vreg, is64bitValue, compilation_unit.code_address);
           }
           arg_reg += is64bitValue ? 2 : 1;
           info_.EndTag();
@@ -246,7 +248,7 @@
                              dex_reg_maps,
                              var.reg_,
                              is64bitValue,
-                             compilation_unit.low_pc,
+                             compilation_unit.code_address,
                              var.start_address_,
                              var.end_address_);
             info_.EndTag();
@@ -445,14 +447,14 @@
                         const std::vector<DexRegisterMap>& dex_register_maps,
                         uint16_t vreg,
                         bool is64bitValue,
-                        uint32_t compilation_unit_low_pc,
+                        uint64_t compilation_unit_code_address,
                         uint32_t dex_pc_low = 0,
                         uint32_t dex_pc_high = 0xFFFFFFFF) {
     WriteDebugLocEntry(method_info,
                        dex_register_maps,
                        vreg,
                        is64bitValue,
-                       compilation_unit_low_pc,
+                       compilation_unit_code_address,
                        dex_pc_low,
                        dex_pc_high,
                        owner_->builder_->GetIsa(),
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index d3859ca..66e135f 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
 #define ART_COMPILER_DEBUG_ELF_DEBUG_LINE_WRITER_H_
 
+#include <unordered_set>
 #include <vector>
 
 #include "compiled_method.h"
@@ -53,7 +54,7 @@
   // Returns the number of bytes written.
   size_t WriteCompilationUnit(ElfCompilationUnit& compilation_unit) {
     const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
-    const Elf_Addr text_address = builder_->GetText()->Exists()
+    const Elf_Addr base_address = compilation_unit.is_code_address_text_relative
         ? builder_->GetText()->GetAddress()
         : 0;
 
@@ -81,48 +82,80 @@
       case kX86_64:
         break;
     }
+    std::unordered_set<uint64_t> seen_addresses(compilation_unit.methods.size());
     dwarf::DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
     for (const MethodDebugInfo* mi : compilation_unit.methods) {
       // Ignore function if we have already generated line table for the same address.
       // It would confuse the debugger and the DWARF specification forbids it.
-      if (mi->deduped) {
+      // We allow the line table for method to be replicated in different compilation unit.
+      // This ensures that each compilation unit contains line table for all its methods.
+      if (!seen_addresses.insert(mi->code_address).second) {
         continue;
       }
 
       uint32_t prologue_end = std::numeric_limits<uint32_t>::max();
-      ArrayRef<const SrcMapElem> pc2dex_map;
-      std::vector<SrcMapElem> pc2dex_map_from_stack_maps;
-      if (mi->IsFromOptimizingCompiler()) {
+      std::vector<SrcMapElem> pc2dex_map;
+      if (mi->code_info != nullptr) {
         // Use stack maps to create mapping table from pc to dex.
-        const CodeInfo code_info(mi->compiled_method->GetVmapTable().data());
+        const CodeInfo code_info(mi->code_info);
         const StackMapEncoding encoding = code_info.ExtractEncoding();
+        pc2dex_map.reserve(code_info.GetNumberOfStackMaps());
         for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
           StackMap stack_map = code_info.GetStackMapAt(s, encoding);
           DCHECK(stack_map.IsValid());
           const uint32_t pc = stack_map.GetNativePcOffset(encoding);
           const int32_t dex = stack_map.GetDexPc(encoding);
-          pc2dex_map_from_stack_maps.push_back({pc, dex});
+          pc2dex_map.push_back({pc, dex});
           if (stack_map.HasDexRegisterMap(encoding)) {
             // Guess that the first map with local variables is the end of prologue.
             prologue_end = std::min(prologue_end, pc);
           }
         }
-        std::sort(pc2dex_map_from_stack_maps.begin(),
-                  pc2dex_map_from_stack_maps.end());
-        pc2dex_map = ArrayRef<const SrcMapElem>(pc2dex_map_from_stack_maps);
-      } else {
-        // Use the mapping table provided by the quick compiler.
-        pc2dex_map = mi->compiled_method->GetSrcMappingTable();
-        prologue_end = 0;
+        std::sort(pc2dex_map.begin(), pc2dex_map.end());
       }
 
       if (pc2dex_map.empty()) {
         continue;
       }
 
-      Elf_Addr method_address = text_address + mi->low_pc;
+      // Compensate for compiler's off-by-one-instruction error.
+      //
+      // The compiler generates stackmap with PC *after* the branch instruction
+      // (because this is the PC which is easier to obtain when unwinding).
+      //
+      // However, the debugger is more clever and it will ask us for line-number
+      // mapping at the location of the branch instruction (since the following
+      // instruction could belong to other line, this is the correct thing to do).
+      //
+      // So we really want to just decrement the PC by one instruction so that the
+      // branch instruction is covered as well. However, we do not know the size
+      // of the previous instruction, and we can not subtract just a fixed amount
+      // (the debugger would trust us that the PC is valid; it might try to set
+      // breakpoint there at some point, and setting breakpoint in mid-instruction
+      // would make the process crash in spectacular way).
+      //
+      // Therefore, we say that the PC which the compiler gave us for the stackmap
+      // is the end of its associated address range, and we use the PC from the
+      // previous stack map as the start of the range. This ensures that the PC is
+      // valid and that the branch instruction is covered.
+      //
+      // This ensures we have correct line number mapping at call sites (which is
+      // important for backtraces), but there is nothing we can do for non-call
+      // sites (so stepping through optimized code in debugger is not possible).
+      //
+      // We do not adjust the stackmaps if the code was compiled as debuggable.
+      // In that case, the stackmaps should accurately cover all instructions.
+      if (!mi->is_native_debuggable) {
+        for (size_t i = pc2dex_map.size() - 1; i > 0; --i) {
+          pc2dex_map[i].from_ = pc2dex_map[i - 1].from_;
+        }
+        pc2dex_map[0].from_ = 0;
+      }
+
+      Elf_Addr method_address = base_address + mi->code_address;
 
       PositionInfos dex2line_map;
+      DCHECK(mi->dex_file != nullptr);
       const DexFile* dex = mi->dex_file;
       if (!dex->DecodeDebugPositionInfo(mi->code_item, PositionInfoCallback, &dex2line_map)) {
         continue;
@@ -184,6 +217,10 @@
 
       // Generate mapping opcodes from PC to Java lines.
       if (file_index != 0) {
+        // If the method was not compiled as native-debuggable, we still generate all available
+        // lines, but we try to prevent the debugger from stepping and setting breakpoints since
+        // the information is too inaccurate for that (breakpoints would be set after the calls).
+        const bool default_is_stmt = mi->is_native_debuggable;
         bool first = true;
         for (SrcMapElem pc2dex : pc2dex_map) {
           uint32_t pc = pc2dex.from_;
@@ -205,13 +242,14 @@
                 // Assume that any preceding code is prologue.
                 int first_line = dex2line_map.front().line_;
                 // Prologue is not a sensible place for a breakpoint.
-                opcodes.NegateStmt();
+                opcodes.SetIsStmt(false);
                 opcodes.AddRow(method_address, first_line);
-                opcodes.NegateStmt();
                 opcodes.SetPrologueEnd();
               }
+              opcodes.SetIsStmt(default_is_stmt);
               opcodes.AddRow(method_address + pc, line);
             } else if (line != opcodes.CurrentLine()) {
+              opcodes.SetIsStmt(default_is_stmt);
               opcodes.AddRow(method_address + pc, line);
             }
           }
@@ -221,7 +259,7 @@
         opcodes.AddRow(method_address, 0);
       }
 
-      opcodes.AdvancePC(text_address + mi->high_pc);
+      opcodes.AdvancePC(method_address + mi->code_size);
       opcodes.EndSequence();
     }
     std::vector<uint8_t> buffer;
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index c321b4b..2d4fff4 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -74,8 +74,8 @@
 }
 
 struct VariableLocation {
-  uint32_t low_pc;
-  uint32_t high_pc;
+  uint32_t low_pc;  // Relative to compilation unit.
+  uint32_t high_pc;  // Relative to compilation unit.
   DexRegisterLocation reg_lo;  // May be None if the location is unknown.
   DexRegisterLocation reg_hi;  // Most significant bits of 64-bit value.
 };
@@ -90,19 +90,23 @@
     const std::vector<DexRegisterMap>& dex_register_maps,
     uint16_t vreg,
     bool is64bitValue,
+    uint64_t compilation_unit_code_address,
     uint32_t dex_pc_low,
     uint32_t dex_pc_high) {
   std::vector<VariableLocation> variable_locations;
 
   // Get stack maps sorted by pc (they might not be sorted internally).
-  const CodeInfo code_info(method_info->compiled_method->GetVmapTable().data());
+  const CodeInfo code_info(method_info->code_info);
   const StackMapEncoding encoding = code_info.ExtractEncoding();
   std::map<uint32_t, uint32_t> stack_maps;  // low_pc -> stack_map_index.
   for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
     StackMap stack_map = code_info.GetStackMapAt(s, encoding);
     DCHECK(stack_map.IsValid());
-    const uint32_t low_pc = method_info->low_pc + stack_map.GetNativePcOffset(encoding);
-    DCHECK_LE(low_pc, method_info->high_pc);
+    const uint32_t pc_offset = stack_map.GetNativePcOffset(encoding);
+    DCHECK_LE(pc_offset, method_info->code_size);
+    DCHECK_LE(compilation_unit_code_address, method_info->code_address);
+    const uint32_t low_pc = dchecked_integral_cast<uint32_t>(
+        method_info->code_address + pc_offset - compilation_unit_code_address);
     stack_maps.emplace(low_pc, s);
   }
 
@@ -113,8 +117,9 @@
     const StackMap& stack_map = code_info.GetStackMapAt(stack_map_index, encoding);
     auto next_it = it;
     next_it++;
-    const uint32_t high_pc = next_it != stack_maps.end() ? next_it->first
-                                                         : method_info->high_pc;
+    const uint32_t high_pc = next_it != stack_maps.end()
+      ? next_it->first
+      : method_info->code_address + method_info->code_size - compilation_unit_code_address;
     DCHECK_LE(low_pc, high_pc);
     if (low_pc == high_pc) {
       continue;  // Ignore if the address range is empty.
@@ -165,7 +170,7 @@
                                const std::vector<DexRegisterMap>& dex_register_maps,
                                uint16_t vreg,
                                bool is64bitValue,
-                               uint32_t compilation_unit_low_pc,
+                               uint64_t compilation_unit_code_address,
                                uint32_t dex_pc_low,
                                uint32_t dex_pc_high,
                                InstructionSet isa,
@@ -173,7 +178,7 @@
                                std::vector<uint8_t>* debug_loc_buffer,
                                std::vector<uint8_t>* debug_ranges_buffer) {
   using Kind = DexRegisterLocation::Kind;
-  if (!method_info->IsFromOptimizingCompiler()) {
+  if (method_info->code_info == nullptr || dex_register_maps.empty()) {
     return;
   }
 
@@ -182,6 +187,7 @@
       dex_register_maps,
       vreg,
       is64bitValue,
+      compilation_unit_code_address,
       dex_pc_low,
       dex_pc_high);
 
@@ -202,9 +208,8 @@
       const Kind kind = reg_loc.GetKind();
       const int32_t value = reg_loc.GetValue();
       if (kind == Kind::kInStack) {
-        const size_t frame_size = method_info->compiled_method->GetFrameSizeInBytes();
         // The stack offset is relative to SP. Make it relative to CFA.
-        expr.WriteOpFbreg(value - frame_size);
+        expr.WriteOpFbreg(value - method_info->frame_size_in_bytes);
         if (piece == 0 && reg_hi.GetKind() == Kind::kInStack &&
             reg_hi.GetValue() == value + 4) {
           break;  // the high word is correctly implied by the low word.
@@ -249,11 +254,11 @@
 
     if (expr.size() > 0) {
       if (is64bit) {
-        debug_loc.PushUint64(variable_location.low_pc - compilation_unit_low_pc);
-        debug_loc.PushUint64(variable_location.high_pc - compilation_unit_low_pc);
+        debug_loc.PushUint64(variable_location.low_pc);
+        debug_loc.PushUint64(variable_location.high_pc);
       } else {
-        debug_loc.PushUint32(variable_location.low_pc - compilation_unit_low_pc);
-        debug_loc.PushUint32(variable_location.high_pc - compilation_unit_low_pc);
+        debug_loc.PushUint32(variable_location.low_pc);
+        debug_loc.PushUint32(variable_location.high_pc);
       }
       // Write the expression.
       debug_loc.PushUint16(expr.size());
@@ -283,11 +288,11 @@
       high_pc = variable_locations[++i].high_pc;
     }
     if (is64bit) {
-      debug_ranges.PushUint64(low_pc - compilation_unit_low_pc);
-      debug_ranges.PushUint64(high_pc - compilation_unit_low_pc);
+      debug_ranges.PushUint64(low_pc);
+      debug_ranges.PushUint64(high_pc);
     } else {
-      debug_ranges.PushUint32(low_pc - compilation_unit_low_pc);
-      debug_ranges.PushUint32(high_pc - compilation_unit_low_pc);
+      debug_ranges.PushUint32(low_pc);
+      debug_ranges.PushUint32(high_pc);
     }
   }
   // Write end-of-list entry.
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 01bd679..4dd8024 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -39,32 +39,31 @@
                     const ArrayRef<const MethodDebugInfo>& method_infos,
                     dwarf::CFIFormat cfi_format,
                     bool write_oat_patches) {
-  // Add methods to .symtab.
+  // Write .strtab and .symtab.
   WriteDebugSymbols(builder, method_infos, true /* with_signature */);
-  // Generate CFI (stack unwinding information).
-  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
-  // Write DWARF .debug_* sections.
-  WriteDebugSections(builder, method_infos, write_oat_patches);
-}
 
-template<typename ElfTypes>
-static void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                               const ArrayRef<const MethodDebugInfo>& method_infos,
-                               bool write_oat_patches) {
+  // Write .debug_frame.
+  WriteCFISection(builder, method_infos, cfi_format, write_oat_patches);
+
   // Group the methods into compilation units based on source file.
   std::vector<ElfCompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
   for (const MethodDebugInfo& mi : method_infos) {
-    auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index);
-    const char* source_file = mi.dex_file->GetSourceFile(dex_class_def);
-    if (compilation_units.empty() || source_file != last_source_file) {
-      compilation_units.push_back(ElfCompilationUnit());
+    if (mi.dex_file != nullptr) {
+      auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index);
+      const char* source_file = mi.dex_file->GetSourceFile(dex_class_def);
+      if (compilation_units.empty() || source_file != last_source_file) {
+        compilation_units.push_back(ElfCompilationUnit());
+      }
+      ElfCompilationUnit& cu = compilation_units.back();
+      cu.methods.push_back(&mi);
+      // All methods must have the same addressing mode otherwise the min/max below does not work.
+      DCHECK_EQ(cu.methods.front()->is_code_address_text_relative, mi.is_code_address_text_relative);
+      cu.is_code_address_text_relative = mi.is_code_address_text_relative;
+      cu.code_address = std::min(cu.code_address, mi.code_address);
+      cu.code_end = std::max(cu.code_end, mi.code_address + mi.code_size);
+      last_source_file = source_file;
     }
-    ElfCompilationUnit& cu = compilation_units.back();
-    cu.methods.push_back(&mi);
-    cu.low_pc = std::min(cu.low_pc, mi.low_pc);
-    cu.high_pc = std::max(cu.high_pc, mi.high_pc);
-    last_source_file = source_file;
   }
 
   // Write .debug_line section.
@@ -91,28 +90,38 @@
 
 std::vector<uint8_t> MakeMiniDebugInfo(
     InstructionSet isa,
+    const InstructionSetFeatures* features,
     size_t rodata_size,
     size_t text_size,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
   if (Is64BitInstructionSet(isa)) {
-    return MakeMiniDebugInfoInternal<ElfTypes64>(isa, rodata_size, text_size, method_infos);
+    return MakeMiniDebugInfoInternal<ElfTypes64>(isa,
+                                                 features,
+                                                 rodata_size,
+                                                 text_size,
+                                                 method_infos);
   } else {
-    return MakeMiniDebugInfoInternal<ElfTypes32>(isa, rodata_size, text_size, method_infos);
+    return MakeMiniDebugInfoInternal<ElfTypes32>(isa,
+                                                 features,
+                                                 rodata_size,
+                                                 text_size,
+                                                 method_infos);
   }
 }
 
 template <typename ElfTypes>
-static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal(
-    const MethodDebugInfo& method_info) {
-  const InstructionSet isa = method_info.compiled_method->GetInstructionSet();
+static ArrayRef<const uint8_t> WriteDebugElfFileForMethodsInternal(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
   VectorOutputStream out("Debug ELF file", &buffer);
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, features, &out));
   // No program headers since the ELF file is not linked and has no allocated sections.
   builder->Start(false /* write_program_headers */);
   WriteDebugInfo(builder.get(),
-                 ArrayRef<const MethodDebugInfo>(&method_info, 1),
+                 method_infos,
                  dwarf::DW_DEBUG_FRAME_FORMAT,
                  false /* write_oat_patches */);
   builder->End();
@@ -124,23 +133,27 @@
   return ArrayRef<const uint8_t>(result, buffer.size());
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const MethodDebugInfo& method_info) {
-  const InstructionSet isa = method_info.compiled_method->GetInstructionSet();
+ArrayRef<const uint8_t> WriteDebugElfFileForMethods(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<const MethodDebugInfo>& method_infos) {
   if (Is64BitInstructionSet(isa)) {
-    return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info);
+    return WriteDebugElfFileForMethodsInternal<ElfTypes64>(isa, features, method_infos);
   } else {
-    return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info);
+    return WriteDebugElfFileForMethodsInternal<ElfTypes32>(isa, features, method_infos);
   }
 }
 
 template <typename ElfTypes>
 static ArrayRef<const uint8_t> WriteDebugElfFileForClassesInternal(
-    const InstructionSet isa, const ArrayRef<mirror::Class*>& types)
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<mirror::Class*>& types)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
   VectorOutputStream out("Debug ELF file", &buffer);
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, features, &out));
   // No program headers since the ELF file is not linked and has no allocated sections.
   builder->Start(false /* write_program_headers */);
   ElfDebugInfoWriter<ElfTypes> info_writer(builder.get());
@@ -158,15 +171,41 @@
   return ArrayRef<const uint8_t>(result, buffer.size());
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
+ArrayRef<const uint8_t> WriteDebugElfFileForClasses(InstructionSet isa,
+                                                    const InstructionSetFeatures* features,
                                                     const ArrayRef<mirror::Class*>& types) {
   if (Is64BitInstructionSet(isa)) {
-    return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, types);
+    return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, features, types);
   } else {
-    return WriteDebugElfFileForClassesInternal<ElfTypes32>(isa, types);
+    return WriteDebugElfFileForClassesInternal<ElfTypes32>(isa, features, types);
   }
 }
 
+std::vector<MethodDebugInfo> MakeTrampolineInfos(const OatHeader& header) {
+  std::map<const char*, uint32_t> trampolines = {
+    { "interpreterToInterpreterBridge", header.GetInterpreterToInterpreterBridgeOffset() },
+    { "interpreterToCompiledCodeBridge", header.GetInterpreterToCompiledCodeBridgeOffset() },
+    { "jniDlsymLookup", header.GetJniDlsymLookupOffset() },
+    { "quickGenericJniTrampoline", header.GetQuickGenericJniTrampolineOffset() },
+    { "quickImtConflictTrampoline", header.GetQuickImtConflictTrampolineOffset() },
+    { "quickResolutionTrampoline", header.GetQuickResolutionTrampolineOffset() },
+    { "quickToInterpreterBridge", header.GetQuickToInterpreterBridgeOffset() },
+  };
+  std::vector<MethodDebugInfo> result;
+  for (const auto& it : trampolines) {
+    if (it.second != 0) {
+      MethodDebugInfo info = MethodDebugInfo();
+      info.trampoline_name = it.first;
+      info.isa = header.GetInstructionSet();
+      info.is_code_address_text_relative = true;
+      info.code_address = it.second - header.GetExecutableOffset();
+      info.code_size = 0;  // The symbol lasts until the next symbol.
+      result.push_back(std::move(info));
+    }
+  }
+  return result;
+}
+
 // Explicit instantiations
 template void WriteDebugInfo<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 103b501..736370e 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
 #define ART_COMPILER_DEBUG_ELF_DEBUG_WRITER_H_
 
+#include <vector>
+
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "debug/dwarf/dwarf_constants.h"
@@ -24,6 +26,7 @@
 #include "utils/array_ref.h"
 
 namespace art {
+class OatHeader;
 namespace mirror {
 class Class;
 }
@@ -31,22 +34,32 @@
 struct MethodDebugInfo;
 
 template <typename ElfTypes>
-void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
-                    const ArrayRef<const MethodDebugInfo>& method_infos,
-                    dwarf::CFIFormat cfi_format,
-                    bool write_oat_patches);
+void WriteDebugInfo(
+    ElfBuilder<ElfTypes>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    dwarf::CFIFormat cfi_format,
+    bool write_oat_patches);
 
-std::vector<uint8_t> MakeMiniDebugInfo(InstructionSet isa,
-                                       size_t rodata_section_size,
-                                       size_t text_section_size,
-                                       const ArrayRef<const MethodDebugInfo>& method_infos);
+std::vector<uint8_t> MakeMiniDebugInfo(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    size_t rodata_section_size,
+    size_t text_section_size,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const MethodDebugInfo& method_info);
+ArrayRef<const uint8_t> WriteDebugElfFileForMethods(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClasses(const InstructionSet isa,
-                                                    const ArrayRef<mirror::Class*>& types)
+ArrayRef<const uint8_t> WriteDebugElfFileForClasses(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    const ArrayRef<mirror::Class*>& types)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+std::vector<MethodDebugInfo> MakeTrampolineInfos(const OatHeader& oat_header);
+
 }  // namespace debug
 }  // namespace art
 
diff --git a/compiler/debug/elf_gnu_debugdata_writer.h b/compiler/debug/elf_gnu_debugdata_writer.h
index 5c7d1c7..fb63d62 100644
--- a/compiler/debug/elf_gnu_debugdata_writer.h
+++ b/compiler/debug/elf_gnu_debugdata_writer.h
@@ -79,13 +79,14 @@
 template <typename ElfTypes>
 static std::vector<uint8_t> MakeMiniDebugInfoInternal(
     InstructionSet isa,
+    const InstructionSetFeatures* features,
     size_t rodata_section_size,
     size_t text_section_size,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
   std::vector<uint8_t> buffer;
   buffer.reserve(KB);
   VectorOutputStream out("Mini-debug-info ELF file", &buffer);
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out));
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, features, &out));
   builder->Start();
   // Mirror .rodata and .text as NOBITS sections.
   // It is needed to detected relocations after compression.
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 41508f4..045eddd 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -39,7 +39,7 @@
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
                               const ArrayRef<const MethodDebugInfo>& method_infos,
                               bool with_signature) {
-  bool generated_mapping_symbol = false;
+  uint64_t mapping_symbol_address = std::numeric_limits<uint64_t>::max();
   auto* strtab = builder->GetStrTab();
   auto* symtab = builder->GetSymTab();
 
@@ -47,12 +47,12 @@
     return;
   }
 
-  // Find all addresses (low_pc) which contain deduped methods.
+  // Find all addresses which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
+  std::unordered_set<uint64_t> deduped_addresses;
   for (const MethodDebugInfo& info : method_infos) {
     if (info.deduped) {
-      deduped_addresses.insert(info.low_pc);
+      deduped_addresses.insert(info.code_address);
     }
   }
 
@@ -64,40 +64,37 @@
     if (info.deduped) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(info.dex_method_index, *info.dex_file, with_signature);
-    if (deduped_addresses.find(info.low_pc) != deduped_addresses.end()) {
-      name += " [DEDUPED]";
+    size_t name_offset;
+    if (info.trampoline_name != nullptr) {
+      name_offset = strtab->Write(info.trampoline_name);
+    } else {
+      DCHECK(info.dex_file != nullptr);
+      std::string name = PrettyMethod(info.dex_method_index, *info.dex_file, with_signature);
+      if (deduped_addresses.find(info.code_address) != deduped_addresses.end()) {
+        name += " [DEDUPED]";
+      }
+      // If we write method names without signature, we might see the same name multiple times.
+      name_offset = (name == last_name ? last_name_offset : strtab->Write(name));
+      last_name = std::move(name);
+      last_name_offset = name_offset;
     }
-    // If we write method names without signature, we might see the same name multiple times.
-    size_t name_offset = (name == last_name ? last_name_offset : strtab->Write(name));
 
-    const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr;
-    const bool is_relative = (text != nullptr);
-    uint32_t low_pc = info.low_pc;
+    const auto* text = info.is_code_address_text_relative ? builder->GetText() : nullptr;
+    uint64_t address = info.code_address + (text != nullptr ? text->GetAddress() : 0);
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += info.compiled_method->CodeDelta();
-    symtab->Add(name_offset,
-                text,
-                low_pc,
-                is_relative,
-                info.high_pc - info.low_pc,
-                STB_GLOBAL,
-                STT_FUNC);
+    address += CompiledMethod::CodeDelta(info.isa);
+    symtab->Add(name_offset, text, address, info.code_size, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     // Note that even if we generate just a single mapping symbol, ARM's Streamline
     // requires it to match function symbol.  Just address 0 does not work.
-    if (info.compiled_method->GetInstructionSet() == kThumb2) {
-      if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), text, info.low_pc & ~1,
-                    is_relative, 0, STB_LOCAL, STT_NOTYPE);
-        generated_mapping_symbol = true;
+    if (info.isa == kThumb2) {
+      if (address < mapping_symbol_address || !kGenerateSingleArmMappingSymbol) {
+        symtab->Add(strtab->Write("$t"), text, address & ~1, 0, STB_LOCAL, STT_NOTYPE);
+        mapping_symbol_address = address;
       }
     }
-
-    last_name = std::move(name);
-    last_name_offset = name_offset;
   }
   strtab->End();
 
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
index 6b3dd8c..ed1da2c 100644
--- a/compiler/debug/method_debug_info.h
+++ b/compiler/debug/method_debug_info.h
@@ -24,22 +24,22 @@
 namespace debug {
 
 struct MethodDebugInfo {
-  const DexFile* dex_file;
+  const char* trampoline_name;
+  const DexFile* dex_file;  // Native methods (trampolines) do not reference dex file.
   size_t class_def_index;
   uint32_t dex_method_index;
   uint32_t access_flags;
   const DexFile::CodeItem* code_item;
+  InstructionSet isa;
   bool deduped;
-  uintptr_t low_pc;
-  uintptr_t high_pc;
-  CompiledMethod* compiled_method;
-
-  bool IsFromOptimizingCompiler() const {
-    return compiled_method->GetQuickCode().size() > 0 &&
-           compiled_method->GetVmapTable().size() > 0 &&
-           compiled_method->GetGcMap().size() == 0 &&
-           code_item != nullptr;
-  }
+  bool is_native_debuggable;
+  bool is_optimized;
+  bool is_code_address_text_relative;  // Is the address offset from start of .text section?
+  uint64_t code_address;
+  uint32_t code_size;
+  uint32_t frame_size_in_bytes;
+  const void* code_info;
+  ArrayRef<const uint8_t> cfi;
 };
 
 }  // namespace debug
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8f5d3ae..48c4356 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -605,6 +605,13 @@
     INTRINSIC(JavaLangString, IndexOf, I_I, kIntrinsicIndexOf, kIntrinsicFlagBase0),
     INTRINSIC(JavaLangString, Length, _I, kIntrinsicIsEmptyOrLength, kIntrinsicFlagLength),
 
+    INTRINSIC(JavaLangStringFactory, NewStringFromBytes, ByteArrayIII_String,
+              kIntrinsicNewStringFromBytes, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangStringFactory, NewStringFromChars, IICharArray_String,
+              kIntrinsicNewStringFromChars, kIntrinsicFlagNone),
+    INTRINSIC(JavaLangStringFactory, NewStringFromString, String_String,
+              kIntrinsicNewStringFromString, kIntrinsicFlagNone),
+
     INTRINSIC(JavaLangThread, CurrentThread, _Thread, kIntrinsicCurrentThread, 0),
 
     INTRINSIC(LibcoreIoMemory, PeekByte, J_B, kIntrinsicPeek, kSignedByte),
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index a220959..4db82a6 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -159,10 +159,16 @@
   size_t GetInlineDepthLimit() const {
     return inline_depth_limit_;
   }
+  void SetInlineDepthLimit(size_t limit) {
+    inline_depth_limit_ = limit;
+  }
 
   size_t GetInlineMaxCodeUnits() const {
     return inline_max_code_units_;
   }
+  void SetInlineMaxCodeUnits(size_t units) {
+    inline_max_code_units_ = units;
+  }
 
   double GetTopKProfileThreshold() const {
     return top_k_profile_threshold_;
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index f7da609..ef44a6f 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -20,6 +20,7 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "arch/mips/instruction_set_features_mips.h"
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/unix_file/fd_file.h"
@@ -38,6 +39,7 @@
 //   .rodata                     - DEX files and oat metadata.
 //   .text                       - Compiled code.
 //   .bss                        - Zero-initialized writeable section.
+//   .MIPS.abiflags              - MIPS specific section.
 //   .dynstr                     - Names for .dynsym.
 //   .dynsym                     - A few oat-specific dynamic symbols.
 //   .hash                       - Hash-table for .dynsym.
@@ -163,12 +165,6 @@
       }
     }
 
-    // Returns true if the section was written to disk.
-    // (Used to check whether we have .text when writing JIT debug info)
-    bool Exists() const {
-      return finished_;
-    }
-
     // Get the location of this section in virtual memory.
     Elf_Addr GetAddress() const {
       CHECK(started_);
@@ -362,16 +358,18 @@
     void Add(Elf_Word name,
              const Section* section,
              Elf_Addr addr,
-             bool is_relative,
              Elf_Word size,
              uint8_t binding,
-             uint8_t type,
-             uint8_t other = 0) {
-      DCHECK(section != nullptr || !is_relative);
-      Elf_Addr abs_addr = addr + (is_relative ? section->GetAddress() : 0);
-      Elf_Word section_index =
-          (section != nullptr) ? section->GetSectionIndex() : static_cast<Elf_Word>(SHN_ABS);
-      Add(name, section_index, abs_addr, size, binding, type, other);
+             uint8_t type) {
+      Elf_Word section_index;
+      if (section != nullptr) {
+        DCHECK_LE(section->GetAddress(), addr);
+        DCHECK_LE(addr, section->GetAddress() + section->GetSize());
+        section_index = section->GetSectionIndex();
+      } else {
+        section_index = static_cast<Elf_Word>(SHN_ABS);
+      }
+      Add(name, section_index, addr, size, binding, type);
     }
 
     void Add(Elf_Word name,
@@ -379,21 +377,90 @@
              Elf_Addr addr,
              Elf_Word size,
              uint8_t binding,
-             uint8_t type,
-             uint8_t other = 0) {
+             uint8_t type) {
       Elf_Sym sym = Elf_Sym();
       sym.st_name = name;
       sym.st_value = addr;
       sym.st_size = size;
-      sym.st_other = other;
+      sym.st_other = 0;
       sym.st_shndx = section_index;
       sym.st_info = (binding << 4) + (type & 0xf);
       CachedSection::Add(&sym, sizeof(sym));
     }
   };
 
-  ElfBuilder(InstructionSet isa, OutputStream* output)
+  class AbiflagsSection FINAL : public Section {
+   public:
+    // Section with Mips abiflag info.
+    static constexpr uint8_t MIPS_AFL_REG_NONE =         0;  // no registers
+    static constexpr uint8_t MIPS_AFL_REG_32 =           1;  // 32-bit registers
+    static constexpr uint8_t MIPS_AFL_REG_64 =           2;  // 64-bit registers
+    static constexpr uint32_t MIPS_AFL_FLAGS1_ODDSPREG = 1;  // Uses odd single-prec fp regs
+    static constexpr uint8_t MIPS_ABI_FP_DOUBLE =        1;  // -mdouble-float
+    static constexpr uint8_t MIPS_ABI_FP_XX =            5;  // -mfpxx
+    static constexpr uint8_t MIPS_ABI_FP_64A =           7;  // -mips32r* -mfp64 -mno-odd-spreg
+
+    AbiflagsSection(ElfBuilder<ElfTypes>* owner,
+                    const std::string& name,
+                    Elf_Word type,
+                    Elf_Word flags,
+                    const Section* link,
+                    Elf_Word info,
+                    Elf_Word align,
+                    Elf_Word entsize,
+                    InstructionSet isa,
+                    const InstructionSetFeatures* features)
+        : Section(owner, name, type, flags, link, info, align, entsize) {
+      if (isa == kMips || isa == kMips64) {
+        bool fpu32 = false;    // assume mips64 values
+        uint8_t isa_rev = 6;   // assume mips64 values
+        if (isa == kMips) {
+          // adjust for mips32 values
+          fpu32 = features->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint();
+          isa_rev = features->AsMipsInstructionSetFeatures()->IsR6()
+              ? 6
+              : features->AsMipsInstructionSetFeatures()->IsMipsIsaRevGreaterThanEqual2()
+                  ? (fpu32 ? 2 : 5)
+                  : 1;
+        }
+        abiflags_.version = 0;  // version of flags structure
+        abiflags_.isa_level = (isa == kMips) ? 32 : 64;
+        abiflags_.isa_rev = isa_rev;
+        abiflags_.gpr_size = (isa == kMips) ? MIPS_AFL_REG_32 : MIPS_AFL_REG_64;
+        abiflags_.cpr1_size = fpu32 ? MIPS_AFL_REG_32 : MIPS_AFL_REG_64;
+        abiflags_.cpr2_size = MIPS_AFL_REG_NONE;
+        // Set the fp_abi to MIPS_ABI_FP_64A for mips32 with 64-bit FPUs (ie: mips32 R5 and R6).
+        // Otherwise set to MIPS_ABI_FP_DOUBLE.
+        abiflags_.fp_abi = (isa == kMips && !fpu32) ? MIPS_ABI_FP_64A : MIPS_ABI_FP_DOUBLE;
+        abiflags_.isa_ext = 0;
+        abiflags_.ases = 0;
+        // To keep the code simple, we are not using odd FP reg for single floats for both
+        // mips32 and mips64 ART. Therefore we are not setting the MIPS_AFL_FLAGS1_ODDSPREG bit.
+        abiflags_.flags1 = 0;
+        abiflags_.flags2 = 0;
+      }
+    }
+
+    Elf_Word GetSize() const {
+      return sizeof(abiflags_);
+    }
+
+    void Write() {
+      this->WriteFully(&abiflags_, sizeof(abiflags_));
+    }
+
+   private:
+    struct {
+      uint16_t version;  // version of this structure
+      uint8_t  isa_level, isa_rev, gpr_size, cpr1_size, cpr2_size;
+      uint8_t  fp_abi;
+      uint32_t isa_ext, ases, flags1, flags2;
+    } abiflags_;
+  };
+
+  ElfBuilder(InstructionSet isa, const InstructionSetFeatures* features, OutputStream* output)
       : isa_(isa),
+        features_(features),
         stream_(output),
         rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
         text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
@@ -410,6 +477,8 @@
         debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
         debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
         shstrtab_(this, ".shstrtab", 0, 1),
+        abiflags_(this, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
+                  isa, features),
         started_(false),
         write_program_headers_(false),
         loaded_size_(0u),
@@ -419,6 +488,7 @@
     dynamic_.phdr_flags_ = PF_R | PF_W;
     dynamic_.phdr_type_ = PT_DYNAMIC;
     eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME;
+    abiflags_.phdr_type_ = PT_MIPS_ABIFLAGS;
   }
   ~ElfBuilder() {}
 
@@ -520,7 +590,7 @@
     stream_.Flush();
 
     // The main ELF header.
-    Elf_Ehdr elf_header = MakeElfHeader(isa_);
+    Elf_Ehdr elf_header = MakeElfHeader(isa_, features_);
     elf_header.e_shoff = section_headers_offset;
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
@@ -564,7 +634,12 @@
     Elf_Word rodata_address = rodata_.GetAddress();
     Elf_Word text_address = RoundUp(rodata_address + rodata_size, kPageSize);
     Elf_Word bss_address = RoundUp(text_address + text_size, kPageSize);
-    Elf_Word dynstr_address = RoundUp(bss_address + bss_size, kPageSize);
+    Elf_Word abiflags_address = RoundUp(bss_address + bss_size, kPageSize);
+    Elf_Word abiflags_size = 0;
+    if (isa_ == kMips || isa_ == kMips64) {
+      abiflags_size = abiflags_.GetSize();
+    }
+    Elf_Word dynstr_address = RoundUp(abiflags_address + abiflags_size, kPageSize);
 
     // Cache .dynstr, .dynsym and .hash data.
     dynstr_.Add("");  // dynstr should start with empty string.
@@ -649,6 +724,12 @@
     return loaded_size_;
   }
 
+  void WriteMIPSabiflagsSection() {
+    abiflags_.Start();
+    abiflags_.Write();
+    abiflags_.End();
+  }
+
   // Returns true if all writes and seeks on the output stream succeeded.
   bool Good() {
     return stream_.Good();
@@ -668,7 +749,7 @@
   }
 
  private:
-  static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
+  static Elf_Ehdr MakeElfHeader(InstructionSet isa, const InstructionSetFeatures* features) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
       case kArm:
@@ -696,18 +777,20 @@
       case kMips: {
         elf_header.e_machine = EM_MIPS;
         elf_header.e_flags = (EF_MIPS_NOREORDER |
-                               EF_MIPS_PIC       |
-                               EF_MIPS_CPIC      |
-                               EF_MIPS_ABI_O32   |
-                               EF_MIPS_ARCH_32R2);
+                              EF_MIPS_PIC       |
+                              EF_MIPS_CPIC      |
+                              EF_MIPS_ABI_O32   |
+                              features->AsMipsInstructionSetFeatures()->IsR6()
+                                  ? EF_MIPS_ARCH_32R6
+                                  : EF_MIPS_ARCH_32R2);
         break;
       }
       case kMips64: {
         elf_header.e_machine = EM_MIPS;
         elf_header.e_flags = (EF_MIPS_NOREORDER |
-                               EF_MIPS_PIC       |
-                               EF_MIPS_CPIC      |
-                               EF_MIPS_ARCH_64R6);
+                              EF_MIPS_PIC       |
+                              EF_MIPS_CPIC      |
+                              EF_MIPS_ARCH_64R6);
         break;
       }
       case kNone: {
@@ -818,6 +901,7 @@
   }
 
   InstructionSet isa_;
+  const InstructionSetFeatures* features_;
 
   ErrorDelayingOutputStream stream_;
 
@@ -836,6 +920,7 @@
   Section debug_info_;
   Section debug_line_;
   StringSection shstrtab_;
+  AbiflagsSection abiflags_;
   std::vector<std::unique_ptr<Section>> other_sections_;
 
   // List of used section in the order in which they were written.
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 19346ec..bed864b 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -51,10 +51,12 @@
 class DebugInfoTask : public Task {
  public:
   DebugInfoTask(InstructionSet isa,
+                const InstructionSetFeatures* features,
                 size_t rodata_section_size,
                 size_t text_section_size,
                 const ArrayRef<const debug::MethodDebugInfo>& method_infos)
       : isa_(isa),
+        instruction_set_features_(features),
         rodata_section_size_(rodata_section_size),
         text_section_size_(text_section_size),
         method_infos_(method_infos) {
@@ -62,6 +64,7 @@
 
   void Run(Thread*) {
     result_ = debug::MakeMiniDebugInfo(isa_,
+                                       instruction_set_features_,
                                        rodata_section_size_,
                                        text_section_size_,
                                        method_infos_);
@@ -73,6 +76,7 @@
 
  private:
   InstructionSet isa_;
+  const InstructionSetFeatures* instruction_set_features_;
   size_t rodata_section_size_;
   size_t text_section_size_;
   const ArrayRef<const debug::MethodDebugInfo>& method_infos_;
@@ -83,6 +87,7 @@
 class ElfWriterQuick FINAL : public ElfWriter {
  public:
   ElfWriterQuick(InstructionSet instruction_set,
+                 const InstructionSetFeatures* features,
                  const CompilerOptions* compiler_options,
                  File* elf_file);
   ~ElfWriterQuick();
@@ -107,6 +112,7 @@
                                std::vector<uint8_t>* buffer);
 
  private:
+  const InstructionSetFeatures* instruction_set_features_;
   const CompilerOptions* const compiler_options_;
   File* const elf_file_;
   size_t rodata_size_;
@@ -121,27 +127,36 @@
 };
 
 std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const InstructionSetFeatures* features,
                                                 const CompilerOptions* compiler_options,
                                                 File* elf_file) {
   if (Is64BitInstructionSet(instruction_set)) {
-    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, compiler_options, elf_file);
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set,
+                                                  features,
+                                                  compiler_options,
+                                                  elf_file);
   } else {
-    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, compiler_options, elf_file);
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set,
+                                                  features,
+                                                  compiler_options,
+                                                  elf_file);
   }
 }
 
 template <typename ElfTypes>
 ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const InstructionSetFeatures* features,
                                          const CompilerOptions* compiler_options,
                                          File* elf_file)
     : ElfWriter(),
+      instruction_set_features_(features),
       compiler_options_(compiler_options),
       elf_file_(elf_file),
       rodata_size_(0u),
       text_size_(0u),
       bss_size_(0u),
       output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
-      builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {}
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, features, output_stream_.get())) {}
 
 template <typename ElfTypes>
 ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
@@ -195,6 +210,9 @@
   if (bss_size_ != 0u) {
     builder_->GetBss()->WriteNoBitsSection(bss_size_);
   }
+  if (builder_->GetIsa() == kMips || builder_->GetIsa() == kMips64) {
+    builder_->WriteMIPSabiflagsSection();
+  }
   builder_->WriteDynamicSection();
 }
 
@@ -205,7 +223,11 @@
     // Prepare the mini-debug-info in background while we do other I/O.
     Thread* self = Thread::Current();
     debug_info_task_ = std::unique_ptr<DebugInfoTask>(
-        new DebugInfoTask(builder_->GetIsa(), rodata_size_, text_size_, method_infos));
+        new DebugInfoTask(builder_->GetIsa(),
+                          instruction_set_features_,
+                          rodata_size_,
+                          text_size_,
+                          method_infos));
     debug_info_thread_pool_ = std::unique_ptr<ThreadPool>(
         new ThreadPool("Mini-debug-info writer", 1));
     debug_info_thread_pool_->AddTask(self, debug_info_task_.get());
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 347d372..3d5dd39 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -26,8 +26,10 @@
 namespace art {
 
 class CompilerOptions;
+class InstructionSetFeatures;
 
 std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const InstructionSetFeatures* features,
                                                 const CompilerOptions* compiler_options,
                                                 File* elf_file);
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 5763cec..7779e44 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -24,6 +24,7 @@
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
 #include "debug/method_debug_info.h"
+#include "driver/compiler_options.h"
 #include "elf_writer.h"
 #include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
@@ -48,8 +49,12 @@
 };
 
 void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
-  // TODO: Test does not currently work with optimizing.
-  CreateCompilerDriver(Compiler::kQuick, kRuntimeISA);
+  CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
+
+  // Set inline filter values.
+  compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit);
+  compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   // Enable write for dex2dex.
   for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
@@ -99,6 +104,7 @@
       const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath();
       std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
           compiler_driver_->GetInstructionSet(),
+          compiler_driver_->GetInstructionSetFeatures(),
           &compiler_driver_->GetCompilerOptions(),
           oat_file.GetFile());
       elf_writer->Start();
@@ -282,14 +288,17 @@
 }
 
 TEST_F(ImageTest, WriteReadUncompressed) {
+  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeUncompressed);
 }
 
 TEST_F(ImageTest, WriteReadLZ4) {
+  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeLZ4);
 }
 
 TEST_F(ImageTest, WriteReadLZ4HC) {
+  TEST_DISABLED_FOR_READ_BARRIER();  // b/27578460
   TestWriteRead(ImageHeader::kStorageModeLZ4HC);
 }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 871435b..b1b971f 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -266,17 +266,9 @@
                      << PrettyDuration(NanoTime() - compress_start_time);
     }
 
-    // Write header first, as uncompressed.
-    image_header->data_size_ = data_size;
-    if (!image_file->WriteFully(image_info.image_->Begin(), sizeof(ImageHeader))) {
-      PLOG(ERROR) << "Failed to write image file header " << image_filename;
-      image_file->Erase();
-      return false;
-    }
-
     // Write out the image + fields + methods.
     const bool is_compressed = compressed_data != nullptr;
-    if (!image_file->WriteFully(image_data_to_write, data_size)) {
+    if (!image_file->PwriteFully(image_data_to_write, data_size, sizeof(ImageHeader))) {
       PLOG(ERROR) << "Failed to write image file data " << image_filename;
       image_file->Erase();
       return false;
@@ -291,13 +283,33 @@
     if (!is_compressed) {
       CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset());
     }
-    if (!image_file->Write(reinterpret_cast<char*>(image_info.image_bitmap_->Begin()),
-                           bitmap_section.Size(),
-                           bitmap_position_in_file)) {
+    if (!image_file->PwriteFully(reinterpret_cast<char*>(image_info.image_bitmap_->Begin()),
+                                 bitmap_section.Size(),
+                                 bitmap_position_in_file)) {
       PLOG(ERROR) << "Failed to write image file " << image_filename;
       image_file->Erase();
       return false;
     }
+
+    int err = image_file->Flush();
+    if (err < 0) {
+      PLOG(ERROR) << "Failed to flush image file " << image_filename << " with result " << err;
+      image_file->Erase();
+      return false;
+    }
+
+    // Write header last in case the compiler gets killed in the middle of image writing.
+    // We do not want to have a corrupted image with a valid header.
+    // The header is uncompressed since it contains whether the image is compressed or not.
+    image_header->data_size_ = data_size;
+    if (!image_file->PwriteFully(reinterpret_cast<char*>(image_info.image_->Begin()),
+                                 sizeof(ImageHeader),
+                                 0)) {
+      PLOG(ERROR) << "Failed to write image file header " << image_filename;
+      image_file->Erase();
+      return false;
+    }
+
     CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(),
              static_cast<size_t>(image_file->GetLength()));
     if (image_file->FlushCloseOrErase() != 0) {
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 6ff1e2e..cda2e27 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -69,7 +69,8 @@
   DCHECK(jit_compiler != nullptr);
   if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) {
     const ArrayRef<mirror::Class*> types_array(types, count);
-    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForClasses(kRuntimeISA, types_array);
+    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
+        kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array);
     CreateJITCodeEntry(std::unique_ptr<const uint8_t[]>(elf_file.data()), elf_file.size());
   }
 }
@@ -168,13 +169,14 @@
   compiler_driver_->SetDedupeEnabled(false);
   compiler_driver_->SetSupportBootImageFixup(false);
 
+  size_t thread_count = compiler_driver_->GetThreadCount();
   if (compiler_options_->GetGenerateDebugInfo()) {
 #ifdef __ANDROID__
     const char* prefix = "/data/misc/trace";
 #else
     const char* prefix = "/tmp";
 #endif
-    DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u)
+    DCHECK_EQ(thread_count, 1u)
         << "Generating debug info only works with one compiler thread";
     std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
     perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
@@ -183,6 +185,10 @@
                     " Are you on a user build? Perf only works on userdebug/eng builds";
     }
   }
+
+  size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
+  DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max())
+      << "ProfilingInfo's inline counter can potentially overflow";
 }
 
 JitCompiler::~JitCompiler() {
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index bc134fe..533dccf 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -42,6 +42,9 @@
   CompilerOptions* GetCompilerOptions() const {
     return compiler_options_.get();
   }
+  CompilerDriver* GetCompilerDriver() const {
+    return compiler_driver_.get();
+  }
 
  private:
   std::unique_ptr<CompilerOptions> compiler_options_;
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 14fd105..d22044a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -176,6 +176,7 @@
                   bool verify) {
     std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
         compiler_driver_->GetInstructionSet(),
+        compiler_driver_->GetInstructionSetFeatures(),
         &compiler_driver_->GetCompilerOptions(),
         file);
     elf_writer->Start();
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c60b02a..c2f19c9 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -806,20 +806,29 @@
         }
       }
 
-      if (writer_->compiler_driver_->GetCompilerOptions().GenerateAnyDebugInfo()) {
+      const CompilerOptions& compiler_options = writer_->compiler_driver_->GetCompilerOptions();
+      // Exclude quickened dex methods (code_size == 0) since they have no native code.
+      if (compiler_options.GenerateAnyDebugInfo() && code_size != 0) {
+        bool has_code_info = method_header->IsOptimized();
         // Record debug information for this function if we are doing that.
-        const uint32_t quick_code_start = quick_code_offset -
-            writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(debug::MethodDebugInfo {
-            dex_file_,
-            class_def_index_,
-            it.GetMemberIndex(),
-            it.GetMethodAccessFlags(),
-            it.GetMethodCodeItem(),
-            deduped,
-            quick_code_start,
-            quick_code_start + code_size,
-            compiled_method});
+        debug::MethodDebugInfo info = debug::MethodDebugInfo();
+        info.trampoline_name = nullptr;
+        info.dex_file = dex_file_;
+        info.class_def_index = class_def_index_;
+        info.dex_method_index = it.GetMemberIndex();
+        info.access_flags = it.GetMethodAccessFlags();
+        info.code_item = it.GetMethodCodeItem();
+        info.isa = compiled_method->GetInstructionSet();
+        info.deduped = deduped;
+        info.is_native_debuggable = compiler_options.GetNativeDebuggable();
+        info.is_optimized = method_header->IsOptimized();
+        info.is_code_address_text_relative = true;
+        info.code_address = code_offset - writer_->oat_header_->GetExecutableOffset();
+        info.code_size = code_size;
+        info.frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
+        info.code_info = has_code_info ? compiled_method->GetVmapTable().data() : nullptr;
+        info.cfi = compiled_method->GetCFIInfo();
+        writer_->method_info_.push_back(info);
       }
 
       if (kIsDebugBuild) {
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 74aab4e..5e7a4a3 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -202,6 +202,10 @@
 
   ~OatWriter();
 
+  void AddMethodDebugInfos(const std::vector<debug::MethodDebugInfo>& infos) {
+    method_info_.insert(method_info_.end(), infos.begin(), infos.end());
+  }
+
   ArrayRef<const debug::MethodDebugInfo> GetMethodDebugInfo() const {
     return ArrayRef<const debug::MethodDebugInfo>(method_info_);
   }
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 967d156..af50363 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -543,8 +543,16 @@
   DCHECK(CheckTypeConsistency(instruction));
   LocationSummary* locations = instruction->GetLocations();
   if (!instruction->IsSuspendCheckEntry()) {
-    if (locations != nullptr && locations->CanCall()) {
-      MarkNotLeaf();
+    if (locations != nullptr) {
+      if (locations->CanCall()) {
+        MarkNotLeaf();
+      } else if (locations->Intrinsified() &&
+                 instruction->IsInvokeStaticOrDirect() &&
+                 !instruction->AsInvokeStaticOrDirect()->HasCurrentMethodInput()) {
+        // A static method call that has been fully intrinsified, and cannot call on the slow
+        // path or refer to the current method directly, no longer needs current method.
+        return;
+      }
     }
     if (instruction->NeedsCurrentMethod()) {
       SetRequiresCurrentMethod();
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index aa9b01f..0b7fefa 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -5727,6 +5727,71 @@
   HandleBitwiseOperation(instruction);
 }
 
+
+void LocationsBuilderARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+         || instruction->GetResultType() == Primitive::kPrimLong);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    ShifterOperand second_reg(second.AsRegister<Register>());
+    Register out_reg = out.AsRegister<Register>();
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ bic(out_reg, first_reg, second_reg);
+        break;
+      case HInstruction::kOr:
+        __ orn(out_reg, first_reg, second_reg);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+    return;
+
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    Register first_low = first.AsRegisterPairLow<Register>();
+    Register first_high = first.AsRegisterPairHigh<Register>();
+    ShifterOperand second_low(second.AsRegisterPairLow<Register>());
+    ShifterOperand second_high(second.AsRegisterPairHigh<Register>());
+    Register out_low = out.AsRegisterPairLow<Register>();
+    Register out_high = out.AsRegisterPairHigh<Register>();
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ bic(out_low, first_low, second_low);
+        __ bic(out_high, first_high, second_high);
+        break;
+      case HInstruction::kOr:
+        __ orn(out_low, first_low, second_low);
+        __ orn(out_high, first_high, second_high);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+  }
+}
+
 void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) {
   // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
   if (value == 0xffffffffu) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 985dc05..89b9e2c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1862,7 +1862,7 @@
   HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instr) {
+void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1871,8 +1871,7 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM64::VisitArm64BitwiseNegatedRight(
-    HArm64BitwiseNegatedRight* instr) {
+void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
   Register dst = OutputRegister(instr);
   Register lhs = InputRegisterAt(instr, 0);
   Register rhs = InputRegisterAt(instr, 1);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index b9638f2..4f1e90c 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -440,13 +440,13 @@
   void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
+
+  void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_arm64
-  void VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instruction) OVERRIDE {
-    StartAttributeStream("kind") << instruction->GetOpKind();
-  }
-
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
     if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3e3719e..d861e39 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -28,6 +28,8 @@
 #include "driver/dex_compilation_unit.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
@@ -220,6 +222,33 @@
   return index;
 }
 
+class ScopedProfilingInfoInlineUse {
+ public:
+  explicit ScopedProfilingInfoInlineUse(ArtMethod* method, Thread* self)
+      : method_(method),
+        self_(self),
+        // Fetch the profiling info ahead of using it. If it's null when fetching,
+        // we should not call JitCodeCache::DoneInlining.
+        profiling_info_(
+            Runtime::Current()->GetJit()->GetCodeCache()->NotifyCompilerUse(method, self)) {
+  }
+
+  ~ScopedProfilingInfoInlineUse() {
+    if (profiling_info_ != nullptr) {
+      size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+      DCHECK_EQ(profiling_info_, method_->GetProfilingInfo(pointer_size));
+      Runtime::Current()->GetJit()->GetCodeCache()->DoneCompilerUse(method_, self_);
+    }
+  }
+
+  ProfilingInfo* GetProfilingInfo() const { return profiling_info_; }
+
+ private:
+  ArtMethod* const method_;
+  Thread* const self_;
+  ProfilingInfo* const profiling_info_;
+};
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -271,30 +300,32 @@
 
   // Check if we can use an inline cache.
   ArtMethod* caller = graph_->GetArtMethod();
-  size_t pointer_size = class_linker->GetImagePointerSize();
-  // Under JIT, we should always know the caller.
-  DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr));
-  if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) {
-    ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size);
-    const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
-    if (ic.IsUnitialized()) {
-      VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(method_index, caller_dex_file)
-                     << " is not hit and not inlined";
-      return false;
-    } else if (ic.IsMonomorphic()) {
-      MaybeRecordStat(kMonomorphicCall);
-      return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
-    } else if (ic.IsPolymorphic()) {
-      MaybeRecordStat(kPolymorphicCall);
-      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
-    } else {
-      DCHECK(ic.IsMegamorphic());
-      VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(method_index, caller_dex_file)
-                     << " is megamorphic and not inlined";
-      MaybeRecordStat(kMegamorphicCall);
-      return false;
+  if (Runtime::Current()->UseJit()) {
+    // Under JIT, we should always know the caller.
+    DCHECK(caller != nullptr);
+    ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
+    ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
+    if (profiling_info != nullptr) {
+      const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+      if (ic.IsUninitialized()) {
+        VLOG(compiler) << "Interface or virtual call to "
+                       << PrettyMethod(method_index, caller_dex_file)
+                       << " is not hit and not inlined";
+        return false;
+      } else if (ic.IsMonomorphic()) {
+        MaybeRecordStat(kMonomorphicCall);
+        return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+      } else if (ic.IsPolymorphic()) {
+        MaybeRecordStat(kPolymorphicCall);
+        return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+      } else {
+        DCHECK(ic.IsMegamorphic());
+        VLOG(compiler) << "Interface or virtual call to "
+                       << PrettyMethod(method_index, caller_dex_file)
+                       << " is megamorphic and not inlined";
+        MaybeRecordStat(kMegamorphicCall);
+        return false;
+      }
     }
   }
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index f8a9a94..049901b 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -70,6 +70,10 @@
   void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE;
   void VisitLessThan(HLessThan* condition) OVERRIDE;
   void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE;
+  void VisitBelow(HBelow* condition) OVERRIDE;
+  void VisitBelowOrEqual(HBelowOrEqual* condition) OVERRIDE;
+  void VisitAbove(HAbove* condition) OVERRIDE;
+  void VisitAboveOrEqual(HAboveOrEqual* condition) OVERRIDE;
   void VisitDiv(HDiv* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
   void VisitNeg(HNeg* instruction) OVERRIDE;
@@ -94,6 +98,7 @@
   void SimplifyCompare(HInvoke* invoke, bool has_zero_op);
   void SimplifyIsNaN(HInvoke* invoke);
   void SimplifyFP2Int(HInvoke* invoke);
+  void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
@@ -558,6 +563,36 @@
   block->RemoveInstruction(check);
 }
 
+static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* arena, HInstruction* cond) {
+  HInstruction *lhs = cond->InputAt(0);
+  HInstruction *rhs = cond->InputAt(1);
+  switch (cond->GetKind()) {
+    case HInstruction::kEqual:
+      return new (arena) HEqual(rhs, lhs);
+    case HInstruction::kNotEqual:
+      return new (arena) HNotEqual(rhs, lhs);
+    case HInstruction::kLessThan:
+      return new (arena) HGreaterThan(rhs, lhs);
+    case HInstruction::kLessThanOrEqual:
+      return new (arena) HGreaterThanOrEqual(rhs, lhs);
+    case HInstruction::kGreaterThan:
+      return new (arena) HLessThan(rhs, lhs);
+    case HInstruction::kGreaterThanOrEqual:
+      return new (arena) HLessThanOrEqual(rhs, lhs);
+    case HInstruction::kBelow:
+      return new (arena) HAbove(rhs, lhs);
+    case HInstruction::kBelowOrEqual:
+      return new (arena) HAboveOrEqual(rhs, lhs);
+    case HInstruction::kAbove:
+      return new (arena) HBelow(rhs, lhs);
+    case HInstruction::kAboveOrEqual:
+      return new (arena) HBelowOrEqual(rhs, lhs);
+    default:
+      LOG(FATAL) << "Unknown ConditionType " << cond->GetKind();
+  }
+  return nullptr;
+}
+
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input_const = equal->GetConstantRight();
   if (input_const != nullptr) {
@@ -981,13 +1016,47 @@
   VisitCondition(condition);
 }
 
-// TODO: unsigned comparisons too?
+void InstructionSimplifierVisitor::VisitBelow(HBelow* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitBelowOrEqual(HBelowOrEqual* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitAbove(HAbove* condition) {
+  VisitCondition(condition);
+}
+
+void InstructionSimplifierVisitor::VisitAboveOrEqual(HAboveOrEqual* condition) {
+  VisitCondition(condition);
+}
 
 void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) {
-  // Try to fold an HCompare into this HCondition.
+  // Reverse condition if left is constant. Our code generators prefer constant
+  // on the right hand side.
+  if (condition->GetLeft()->IsConstant() && !condition->GetRight()->IsConstant()) {
+    HBasicBlock* block = condition->GetBlock();
+    HCondition* replacement = GetOppositeConditionSwapOps(block->GetGraph()->GetArena(), condition);
+    // If it is a fp we must set the opposite bias.
+    if (replacement != nullptr) {
+      if (condition->IsLtBias()) {
+        replacement->SetBias(ComparisonBias::kGtBias);
+      } else if (condition->IsGtBias()) {
+        replacement->SetBias(ComparisonBias::kLtBias);
+      }
+      block->ReplaceAndRemoveInstructionWith(condition, replacement);
+      RecordSimplification();
+
+      condition = replacement;
+    }
+  }
 
   HInstruction* left = condition->GetLeft();
   HInstruction* right = condition->GetRight();
+
+  // Try to fold an HCompare into this HCondition.
+
   // We can only replace an HCondition which compares a Compare to 0.
   // Both 'dx' and 'jack' generate a compare to 0 when compiling a
   // condition with a long, float or double comparison as input.
@@ -1594,6 +1663,12 @@
   invoke->ReplaceWithExceptInReplacementAtIndex(select, 0);  // false at index 0
 }
 
+void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
+  uint32_t dex_pc = invoke->GetDexPc();
+  HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
+}
+
 void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
   switch (instruction->GetIntrinsic()) {
     case Intrinsics::kStringEquals:
@@ -1626,6 +1701,15 @@
     case Intrinsics::kDoubleDoubleToLongBits:
       SimplifyFP2Int(instruction);
       break;
+    case Intrinsics::kUnsafeLoadFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
+      break;
+    case Intrinsics::kUnsafeStoreFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kAnyStore);
+      break;
+    case Intrinsics::kUnsafeFullFence:
+      SimplifyMemBarrier(instruction, MemBarrierKind::kAnyAny);
+      break;
     default:
       break;
   }
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index db1f9a7..cd026b8 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -26,5 +26,18 @@
   }
 }
 
+void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) {
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
+}
+
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 379b95d..14c940e 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -36,6 +36,8 @@
   }
 
   void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
+  void VisitAnd(HAnd* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index c2bbdcc..f00d960 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -180,51 +180,10 @@
   return true;
 }
 
-bool InstructionSimplifierArm64Visitor::TryMergeNegatedInput(HBinaryOperation* op) {
-  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
-  HInstruction* left = op->GetLeft();
-  HInstruction* right = op->GetRight();
-
-  // Only consider the case where there is exactly one Not, with 2 Not's De
-  // Morgan's laws should be applied instead.
-  if (left->IsNot() ^ right->IsNot()) {
-    HInstruction* hnot = (left->IsNot() ? left : right);
-    HInstruction* hother = (left->IsNot() ? right : left);
-
-    // Only do the simplification if the Not has only one use and can thus be
-    // safely removed. Even though ARM64 negated bitwise operations do not have
-    // an immediate variant (only register), we still do the simplification when
-    // `hother` is a constant, because it removes an instruction if the constant
-    // cannot be encoded as an immediate:
-    //   mov r0, #large_constant
-    //   neg r2, r1
-    //   and r0, r0, r2
-    // becomes:
-    //   mov r0, #large_constant
-    //   bic r0, r0, r1
-    if (hnot->HasOnlyOneNonEnvironmentUse()) {
-      // Replace code looking like
-      //    NOT tmp, mask
-      //    AND dst, src, tmp   (respectively ORR, EOR)
-      // with
-      //    BIC dst, src, mask  (respectively ORN, EON)
-      HInstruction* src = hnot->AsNot()->GetInput();
-
-      HArm64BitwiseNegatedRight* neg_op = new (GetGraph()->GetArena())
-          HArm64BitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
-
-      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
-      hnot->GetBlock()->RemoveInstruction(hnot);
-      RecordSimplification();
-      return true;
-    }
-  }
-
-  return false;
-}
-
 void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
-  TryMergeNegatedInput(instruction);
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
@@ -248,7 +207,9 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitOr(HOr* instruction) {
-  TryMergeNegatedInput(instruction);
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
@@ -284,7 +245,9 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) {
-  TryMergeNegatedInput(instruction);
+  if (TryMergeNegatedInput(instruction)) {
+    RecordSimplification();
+  }
 }
 
 }  // namespace arm64
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index cf84587..338120b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,10 +51,6 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
-  // For bitwise operations (And/Or/Xor) with a negated input, try to use
-  // a negated bitwise instruction.
-  bool TryMergeNegatedInput(HBinaryOperation* op);
-
   // HInstruction visitors, sorted alphabetically.
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 45d196f..a11b5bd 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -186,4 +186,47 @@
   return false;
 }
 
+
+bool TryMergeNegatedInput(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // Only consider the case where there is exactly one Not, with 2 Not's De
+  // Morgan's laws should be applied instead.
+  if (left->IsNot() ^ right->IsNot()) {
+    HInstruction* hnot = (left->IsNot() ? left : right);
+    HInstruction* hother = (left->IsNot() ? right : left);
+
+    // Only do the simplification if the Not has only one use and can thus be
+    // safely removed. Even though ARM64 negated bitwise operations do not have
+    // an immediate variant (only register), we still do the simplification when
+    // `hother` is a constant, because it removes an instruction if the constant
+    // cannot be encoded as an immediate:
+    //   mov r0, #large_constant
+    //   neg r2, r1
+    //   and r0, r0, r2
+    // becomes:
+    //   mov r0, #large_constant
+    //   bic r0, r0, r1
+    if (hnot->HasOnlyOneNonEnvironmentUse()) {
+      // Replace code looking like
+      //    NOT tmp, mask
+      //    AND dst, src, tmp   (respectively ORR, EOR)
+      // with
+      //    BIC dst, src, mask  (respectively ORN, EON)
+      HInstruction* src = hnot->AsNot()->GetInput();
+
+      HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetArena())
+          HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
+
+      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
+      hnot->GetBlock()->RemoveInstruction(hnot);
+      return true;
+    }
+  }
+
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 9832ecc..b1fe8f4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -22,6 +22,9 @@
 namespace art {
 
 bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa);
+// For bitwise operations (And/Or/Xor) with a negated input, try to use
+// a negated bitwise instruction.
+bool TryMergeNegatedInput(HBinaryOperation* op);
 
 }  // namespace art
 
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 0cec5cc..3da8285 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -231,7 +231,10 @@
 UNREACHABLE_INTRINSIC(Arch, IntegerCompare)         \
 UNREACHABLE_INTRINSIC(Arch, LongCompare)            \
 UNREACHABLE_INTRINSIC(Arch, IntegerSignum)          \
-UNREACHABLE_INTRINSIC(Arch, LongSignum)
+UNREACHABLE_INTRINSIC(Arch, LongSignum)             \
+UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence)        \
+UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence)       \
+UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence)
 
 }  // namespace art
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b599d42..4b94c94 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1224,8 +1224,9 @@
 
   __ LoadFromOffset(
       kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ blx(LR);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1251,8 +1252,9 @@
   // all include a null check on `data` before calling that method.
   __ LoadFromOffset(
       kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ blx(LR);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
@@ -1276,8 +1278,9 @@
 
   __ LoadFromOffset(kLoadWord,
       LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ blx(LR);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -2008,9 +2011,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetInt)
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(ARM, UnsafeLoadFence)
-UNIMPLEMENTED_INTRINSIC(ARM, UnsafeStoreFence)
-UNIMPLEMENTED_INTRINSIC(ARM, UnsafeFullFence)
 
 UNREACHABLE_INTRINSICS(ARM)
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ccbbd43..5de2306 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1409,8 +1409,9 @@
 
   __ Ldr(lr,
       MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   __ Blr(lr);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1436,19 +1437,17 @@
   // all include a null check on `data` before calling that method.
   __ Ldr(lr,
       MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   __ Blr(lr);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
-  // The inputs plus one temp.
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
-  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
@@ -1464,8 +1463,9 @@
 
   __ Ldr(lr,
       MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   __ Blr(lr);
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1959,9 +1959,6 @@
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt)
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeLoadFence)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeStoreFence)
-UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeFullFence)
 
 UNREACHABLE_INTRINSICS(ARM64)
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 697b8fe..c306cf9 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1457,6 +1457,24 @@
   }
 }
 
+// Thread java.lang.Thread.currentThread()
+void IntrinsicLocationsBuilderMIPS::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  Register out = invoke->GetLocations()->Out().AsRegister<Register>();
+
+  __ LoadFromOffset(kLoadWord,
+                    out,
+                    TR,
+                    Thread::PeerOffset<kMipsPointerSize>().Int32Value());
+}
+
 // char java.lang.String.charAt(int index)
 void IntrinsicLocationsBuilderMIPS::VisitStringCharAt(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1464,7 +1482,9 @@
                                                             kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+  // The inputs will be considered live at the last instruction and restored. This will overwrite
+  // the output with kNoOutputOverlap.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitStringCharAt(HInvoke* invoke) {
@@ -1503,6 +1523,40 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+// int java.lang.String.compareTo(String anotherString)
+void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  Register argument = locations->InAt(1).AsRegister<Register>();
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqz(argument, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize,
+                                            pStringCompareTo).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  __ Bind(slow_path->GetExitLabel());
+}
+
 // boolean java.lang.String.equals(Object anObject)
 void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
@@ -1605,6 +1659,211 @@
   __ Bind(&end);
 }
 
+static void GenerateStringIndexOf(HInvoke* invoke,
+                                  bool start_at_zero,
+                                  MipsAssembler* assembler,
+                                  CodeGeneratorMIPS* codegen,
+                                  ArenaAllocator* allocator) {
+  LocationSummary* locations = invoke->GetLocations();
+  Register tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<Register>() : TMP;
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we
+  // don't know statically, or directly dispatch if we have a constant.
+  SlowPathCodeMIPS* slow_path = nullptr;
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    if (!IsUint<16>(invoke->InputAt(1)->AsIntConstant()->GetValue())) {
+      // Always needs the slow-path. We could directly dispatch to it,
+      // but this case should be rare, so for simplicity just put the
+      // full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathMIPS(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else {
+    Register char_reg = locations->InAt(1).AsRegister<Register>();
+    // The "bltu" conditional branch tests to see if the character value
+    // fits in a valid 16-bit (MIPS halfword) value. If it doesn't then
+    // the character being searched for, if it exists in the string, is
+    // encoded using UTF-16 and stored in the string as two (16-bit)
+    // halfwords. Currently the assembly code used to implement this
+    // intrinsic doesn't support searching for a character stored as
+    // two halfwords so we fallback to using the generic implementation
+    // of indexOf().
+    __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max());
+    slow_path = new (allocator) IntrinsicSlowPathMIPS(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ Bltu(tmp_reg, char_reg, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    DCHECK_EQ(tmp_reg, A2);
+    // Start-index = 0.
+    __ Clear(tmp_reg);
+  }
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pIndexOf).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+// int java.lang.String.indexOf(int ch)
+void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime
+  // calling convention. So it's best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+
+  // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke,
+                        /* start_at_zero */ true,
+                        GetAssembler(),
+                        codegen_,
+                        GetAllocator());
+}
+
+// int java.lang.String.indexOf(int ch, int fromIndex)
+void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime
+  // calling convention. So it's best to align the inputs accordingly.
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+
+  // Need a temp for slow-path codepoint compare.
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateStringIndexOf(invoke,
+                        /* start_at_zero */ false,
+                        GetAssembler(),
+                        codegen_,
+                        GetAllocator());
+}
+
+// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
+void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register byte_array = locations->InAt(0).AsRegister<Register>();
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqz(byte_array, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromBytes).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromChars).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+// java.lang.StringFactory.newStringFromString(String toCopy)
+void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
+  locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+  SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ Beqz(string_to_copy, slow_path->GetEntryLabel());
+
+  __ LoadFromOffset(kLoadWord,
+                    T9,
+                    TR,
+                    QUICK_ENTRYPOINT_OFFSET(kMipsWordSize, pAllocStringFromString).Int32Value());
+  __ Jalr(T9);
+  __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 static void GenIsInfinite(LocationSummary* locations,
                           const Primitive::Type type,
                           const bool isR6,
@@ -1783,7 +2042,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundFloat)
-UNIMPLEMENTED_INTRINSIC(MIPS, ThreadCurrentThread)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGet)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetVolatile)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLong)
@@ -1802,12 +2060,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASInt)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASObject)
-UNIMPLEMENTED_INTRINSIC(MIPS, StringCompareTo)
-UNIMPLEMENTED_INTRINSIC(MIPS, StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(MIPS, StringIndexOfAfter)
-UNIMPLEMENTED_INTRINSIC(MIPS, StringNewStringFromBytes)
-UNIMPLEMENTED_INTRINSIC(MIPS, StringNewStringFromChars)
-UNIMPLEMENTED_INTRINSIC(MIPS, StringNewStringFromString)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS, StringGetCharsNoCheck)
@@ -1838,9 +2090,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeLoadFence)
-UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeStoreFence)
-UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeFullFence)
 
 UNREACHABLE_INTRINSICS(MIPS)
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 83dff33..cf973aa 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1356,10 +1356,10 @@
   __ Beqzc(argument, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pStringCompareTo).Int32Value());
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1503,17 +1503,14 @@
     DCHECK_EQ(tmp_reg, A2);
     // Start-index = 0.
     __ Clear(tmp_reg);
-  } else {
-    __ Slt(TMP, A2, ZERO);      // if fromIndex < 0
-    __ Seleqz(A2, A2, TMP);     //     fromIndex = 0
   }
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize, pIndexOf).Int32Value());
   CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
-  __ Jalr(TMP);
+  __ Jalr(T9);
   __ Nop();
 
   if (slow_path != nullptr) {
@@ -1586,13 +1583,14 @@
   __ Beqzc(byte_array, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
                                             pAllocStringFromBytes).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  __ Jalr(TMP);
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
+  __ Jalr(T9);
   __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1619,24 +1617,23 @@
   //
   // all include a null check on `data` before calling that method.
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
                                             pAllocStringFromChars).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  __ Jalr(TMP);
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
+  __ Jalr(T9);
   __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-// java.lang.String.String(String original)
+// java.lang.StringFactory.newStringFromString(String toCopy)
 void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
                                                             kIntrinsified);
   InvokeRuntimeCallingConvention calling_convention;
   locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
   Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
   locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
 }
@@ -1651,13 +1648,14 @@
   __ Beqzc(string_to_copy, slow_path->GetEntryLabel());
 
   __ LoadFromOffset(kLoadDoubleword,
-                    TMP,
+                    T9,
                     TR,
                     QUICK_ENTRYPOINT_OFFSET(kMips64DoublewordSize,
                                             pAllocStringFromString).Int32Value());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
-  __ Jalr(TMP);
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
+  __ Jalr(T9);
   __ Nop();
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1735,9 +1733,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeLoadFence)
-UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeStoreFence)
-UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeFullFence)
 
 UNREACHABLE_INTRINSICS(MIPS64)
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 048590e..95fdb9b 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1546,6 +1546,7 @@
   __ j(kEqual, slow_path->GetEntryLabel());
 
   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1571,6 +1572,7 @@
   //
   // all include a null check on `data` before calling that method.
   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
@@ -1594,6 +1596,7 @@
   __ j(kEqual, slow_path->GetEntryLabel());
 
   __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -2643,9 +2646,6 @@
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeLoadFence)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeStoreFence)
-UNIMPLEMENTED_INTRINSIC(X86, UnsafeFullFence)
 
 UNREACHABLE_INTRINSICS(X86)
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 35e13a6..9e568f7 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1641,6 +1641,7 @@
 
   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
                                   /* no_rip */ true));
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1667,6 +1668,7 @@
   // all include a null check on `data` before calling that method.
   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
                                   /* no_rip */ true));
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
@@ -1691,6 +1693,7 @@
 
   __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
                                   /* no_rip */ true));
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -2721,9 +2724,6 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt)
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeLoadFence)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeStoreFence)
-UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeFullFence)
 
 UNREACHABLE_INTRINSICS(X86_64)
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 77ded29..98766a3 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2181,7 +2181,9 @@
     DCHECK(upper_bound_rti.IsSupertypeOf(rti))
         << " upper_bound_rti: " << upper_bound_rti
         << " rti: " << rti;
-    DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact());
+    DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact())
+        << " upper_bound_rti: " << upper_bound_rti
+        << " rti: " << rti;
   }
 }
 
@@ -2215,6 +2217,10 @@
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
     DCHECK(IsValidHandle(type_handle));
+    if (!is_exact) {
+      DCHECK(!type_handle->CannotBeAssignedFromOtherTypes())
+          << "Callers of ReferenceTypeInfo::Create should ensure is_exact is properly computed";
+    }
   }
   return ReferenceTypeInfo(type_handle, is_exact);
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b684cc6..1bb5f5d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,6 +26,7 @@
 #include "base/arena_object.h"
 #include "base/stl_util.h"
 #include "dex/compiler_enums.h"
+#include "dex_instruction-inl.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "handle.h"
 #include "handle_scope.h"
@@ -508,6 +509,8 @@
   // before cursor.
   HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
 
+  ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; }
+
  private:
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited);
@@ -1264,6 +1267,7 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M)                         \
+  M(BitwiseNegatedRight, Instruction)                                   \
   M(MultiplyAccumulate, Instruction)
 #endif
 
@@ -1278,7 +1282,6 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64BitwiseNegatedRight, Instruction)                              \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
   M(Arm64IntermediateAddress, Instruction)
 #endif
@@ -2963,6 +2966,8 @@
   virtual IfCondition GetOppositeCondition() const = 0;
 
   bool IsGtBias() const { return GetBias() == ComparisonBias::kGtBias; }
+  bool IsLtBias() const { return GetBias() == ComparisonBias::kLtBias; }
+
   ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
   void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); }
 
@@ -2973,13 +2978,23 @@
   bool IsFPConditionTrueIfNaN() const {
     DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
-    return IsGtBias() ? ((if_cond == kCondGT) || (if_cond == kCondGE)) : (if_cond == kCondNE);
+    if (if_cond == kCondNE) {
+      return true;
+    } else if (if_cond == kCondEQ) {
+      return false;
+    }
+    return ((if_cond == kCondGT) || (if_cond == kCondGE)) && IsGtBias();
   }
 
   bool IsFPConditionFalseIfNaN() const {
     DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType();
     IfCondition if_cond = GetCondition();
-    return IsGtBias() ? ((if_cond == kCondLT) || (if_cond == kCondLE)) : (if_cond == kCondEQ);
+    if (if_cond == kCondEQ) {
+      return true;
+    } else if (if_cond == kCondNE) {
+      return false;
+    }
+    return ((if_cond == kCondLT) || (if_cond == kCondLE)) && IsGtBias();
   }
 
  protected:
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 75a71e7..173852a 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,66 +118,6 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
-class HArm64BitwiseNegatedRight : public HBinaryOperation {
- public:
-  HArm64BitwiseNegatedRight(Primitive::Type result_type,
-                            InstructionKind op,
-                            HInstruction* left,
-                            HInstruction* right,
-                            uint32_t dex_pc = kNoDexPc)
-    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
-      op_kind_(op) {
-    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
-  }
-
-  template <typename T, typename U>
-  auto Compute(T x, U y) const -> decltype(x & ~y) {
-    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
-                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
-                  "Inconsistent negated bitwise types");
-    switch (op_kind_) {
-      case HInstruction::kAnd:
-        return x & ~y;
-      case HInstruction::kOr:
-        return x | ~y;
-      case HInstruction::kXor:
-        return x ^ ~y;
-      default:
-        LOG(FATAL) << "Unreachable";
-        UNREACHABLE();
-    }
-  }
-
-  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetIntConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
-    return GetBlock()->GetGraph()->GetLongConstant(
-        Compute(x->GetValue(), y->GetValue()), GetDexPc());
-  }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << DebugName() << " is not defined for float values";
-    UNREACHABLE();
-  }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
-    LOG(FATAL) << DebugName() << " is not defined for double values";
-    UNREACHABLE();
-  }
-
-  InstructionKind GetOpKind() const { return op_kind_; }
-
-  DECLARE_INSTRUCTION(Arm64BitwiseNegatedRight);
-
- private:
-  // Specifies the bitwise operation, which will be then negated.
-  const InstructionKind op_kind_;
-
-  DISALLOW_COPY_AND_ASSIGN(HArm64BitwiseNegatedRight);
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index b04b622..c10c718 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -53,6 +53,66 @@
   DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate);
 };
 
+class HBitwiseNegatedRight : public HBinaryOperation {
+ public:
+  HBitwiseNegatedRight(Primitive::Type result_type,
+                            InstructionKind op,
+                            HInstruction* left,
+                            HInstruction* right,
+                            uint32_t dex_pc = kNoDexPc)
+    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
+      op_kind_(op) {
+    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
+  }
+
+  template <typename T, typename U>
+  auto Compute(T x, U y) const -> decltype(x & ~y) {
+    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
+                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
+                  "Inconsistent negated bitwise types");
+    switch (op_kind_) {
+      case HInstruction::kAnd:
+        return x & ~y;
+      case HInstruction::kOr:
+        return x | ~y;
+      case HInstruction::kXor:
+        return x ^ ~y;
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(BitwiseNegatedRight);
+
+ private:
+  // Specifies the bitwise operation, which will be then negated.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_SHARED_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c1b4d24..7a82063 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,7 +20,7 @@
 #include <memory>
 #include <stdint.h>
 
-#ifdef ART_ENABLE_CODEGEN_arm64
+#ifdef ART_ENABLE_CODEGEN_arm
 #include "dex_cache_array_fixups_arm.h"
 #endif
 
@@ -431,6 +431,7 @@
 
 static void RunArchOptimizations(InstructionSet instruction_set,
                                  HGraph* graph,
+                                 CodeGenerator* codegen,
                                  OptimizingCompilerStats* stats,
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
@@ -466,7 +467,8 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
-      x86::PcRelativeFixups* pc_relative_fixups = new (arena) x86::PcRelativeFixups(graph, stats);
+      x86::PcRelativeFixups* pc_relative_fixups =
+          new (arena) x86::PcRelativeFixups(graph, codegen, stats);
       HOptimization* x86_optimizations[] = {
           pc_relative_fixups
       };
@@ -561,7 +563,7 @@
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
-  RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
+  RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, stats, pass_observer);
   AllocateRegisters(graph, codegen, pass_observer);
 }
 
@@ -913,34 +915,31 @@
     return false;
   }
 
-  if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) {
+  const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+  if (compiler_options.GetGenerateDebugInfo()) {
     const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
     const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
-    CompiledMethod compiled_method(
-        GetCompilerDriver(),
-        codegen->GetInstructionSet(),
-        ArrayRef<const uint8_t>(code_allocator.GetMemory()),
-        codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
-        codegen->GetCoreSpillMask(),
-        codegen->GetFpuSpillMask(),
-        ArrayRef<const SrcMapElem>(),
-        ArrayRef<const uint8_t>(),  // mapping_table.
-        ArrayRef<const uint8_t>(stack_map_data, stack_map_size),
-        ArrayRef<const uint8_t>(),  // native_gc_map.
-        ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
-        ArrayRef<const LinkerPatch>());
-    debug::MethodDebugInfo method_debug_info {
-        dex_file,
-        class_def_idx,
-        method_idx,
-        access_flags,
-        code_item,
-        false,  // deduped.
-        code_address,
-        code_address + code_allocator.GetSize(),
-        &compiled_method
-    };
-    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForMethod(method_debug_info);
+    debug::MethodDebugInfo info = debug::MethodDebugInfo();
+    info.trampoline_name = nullptr;
+    info.dex_file = dex_file;
+    info.class_def_index = class_def_idx;
+    info.dex_method_index = method_idx;
+    info.access_flags = access_flags;
+    info.code_item = code_item;
+    info.isa = codegen->GetInstructionSet();
+    info.deduped = false;
+    info.is_native_debuggable = compiler_options.GetNativeDebuggable();
+    info.is_optimized = true;
+    info.is_code_address_text_relative = false;
+    info.code_address = code_address;
+    info.code_size = code_allocator.GetSize();
+    info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
+    info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
+    info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
+    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
+        GetCompilerDriver()->GetInstructionSet(),
+        GetCompilerDriver()->GetInstructionSetFeatures(),
+        ArrayRef<const debug::MethodDebugInfo>(&info, 1));
     CreateJITCodeEntryForAddress(code_address,
                                  std::unique_ptr<const uint8_t[]>(elf_file.data()),
                                  elf_file.size());
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a6f1461..d281a9f 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -16,6 +16,7 @@
 
 #include "pc_relative_fixups_x86.h"
 #include "code_generator_x86.h"
+#include "intrinsics_x86.h"
 
 namespace art {
 namespace x86 {
@@ -25,7 +26,10 @@
  */
 class PCRelativeHandlerVisitor : public HGraphVisitor {
  public:
-  explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+  PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen)
+      : HGraphVisitor(graph),
+        codegen_(down_cast<CodeGeneratorX86*>(codegen)),
+        base_(nullptr) {}
 
   void MoveBaseIfNeeded() {
     if (base_ != nullptr) {
@@ -146,7 +150,6 @@
     if (base_ != nullptr) {
       return;
     }
-
     // Insert the base at the start of the entry block, move it to a better
     // position later in MoveBaseIfNeeded().
     base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
@@ -180,7 +183,9 @@
     }
 
     bool base_added = false;
-    if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
+    if (invoke_static_or_direct != nullptr &&
+        invoke_static_or_direct->HasPcRelativeDexCache() &&
+        !WillHaveCallFreeIntrinsicsCodeGen(invoke)) {
       InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
       invoke_static_or_direct->AddSpecialInput(base_);
@@ -215,6 +220,24 @@
     }
   }
 
+  bool WillHaveCallFreeIntrinsicsCodeGen(HInvoke* invoke) {
+    if (invoke->GetIntrinsic() != Intrinsics::kNone) {
+      // This invoke may have intrinsic code generation defined. However, we must
+      // now also determine if this code generation is truly there and call-free
+      // (not unimplemented, no bail on instruction features, or call on slow path).
+      // This is done by actually calling the locations builder on the instruction
+      // and clearing out the locations once result is known. We assume this
+      // call only has creating locations as side effects!
+      IntrinsicLocationsBuilderX86 builder(codegen_);
+      bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
+      invoke->SetLocations(nullptr);
+      return success;
+    }
+    return false;
+  }
+
+  CodeGeneratorX86* codegen_;
+
   // The generated HX86ComputeBaseMethodAddress in the entry block needed as an
   // input to the HX86LoadFromConstantTable instructions.
   HX86ComputeBaseMethodAddress* base_;
@@ -226,7 +249,7 @@
     // that can be live-in at the irreducible loop header.
     return;
   }
-  PCRelativeHandlerVisitor visitor(graph_);
+  PCRelativeHandlerVisitor visitor(graph_, codegen_);
   visitor.VisitInsertionOrder();
   visitor.MoveBaseIfNeeded();
 }
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index af708ac..03de2fc 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -21,14 +21,21 @@
 #include "optimization.h"
 
 namespace art {
+
+class CodeGenerator;
+
 namespace x86 {
 
 class PcRelativeFixups : public HOptimization {
  public:
-  PcRelativeFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "pc_relative_fixups_x86", stats) {}
+  PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "pc_relative_fixups_x86", stats),
+        codegen_(codegen) {}
 
   void Run() OVERRIDE;
+
+ private:
+  CodeGenerator* codegen_;
 };
 
 }  // namespace x86
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index deaa415..75356c8 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -432,11 +432,10 @@
   } else if (klass != nullptr) {
     ScopedObjectAccess soa(Thread::Current());
     ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
-    is_exact = is_exact || klass->CannotBeAssignedFromOtherTypes();
+    is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes();
     instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
   } else {
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -518,8 +517,7 @@
     HUnresolvedInstanceFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
-    instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -527,8 +525,7 @@
     HUnresolvedStaticFieldGet* instr) {
   // TODO: Use descriptor to get the actual type.
   if (instr->GetFieldType() == Primitive::kPrimNot) {
-    instr->SetReferenceTypeInfo(
-      ReferenceTypeInfo::Create(handle_cache_->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -724,12 +721,11 @@
   if (handle->IsObjectArrayClass()) {
     ReferenceTypeInfo::TypeHandle component_handle =
         handle_cache->NewHandle(handle->GetComponentType());
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(component_handle, /* is_exact */ false));
+    bool is_exact = component_handle->CannotBeAssignedFromOtherTypes();
+    instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(component_handle, is_exact));
   } else {
     // We don't know what the parent actually is, so we fallback to object.
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(handle_cache->GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
   }
 }
 
@@ -811,8 +807,7 @@
   if (first_input_index_not_null == input_count) {
     // All inputs are NullConstants, set the type to object.
     // This may happen in the presence of inlining.
-    instr->SetReferenceTypeInfo(
-        ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact */ false));
+    instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti());
     return;
   }
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ea19059..ce4f38a 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -46,6 +46,7 @@
 #include "class_linker.h"
 #include "compiler.h"
 #include "compiler_callbacks.h"
+#include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
 #include "dex/pass_manager.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
@@ -1035,8 +1036,13 @@
         OatHeader::kDebuggableKey,
         compiler_options_->debuggable_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
     key_value_store_->Put(
-        OatHeader::kExtractOnlyKey,
-        compiler_options_->IsExtractOnly() ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+        OatHeader::kNativeDebuggableKey,
+        compiler_options_->native_debuggable_ ? OatHeader::kTrueValue : OatHeader::kFalseValue);
+    if (compiler_options_->IsExtractOnly()) {
+      key_value_store_->Put(OatHeader::kCompilationType, OatHeader::kExtractOnlyValue);
+    } else if (UseProfileGuidedCompilation()) {
+      key_value_store_->Put(OatHeader::kCompilationType, OatHeader::kProfileGuideCompiledValue);
+    }
   }
 
   // Parse the arguments from the command line. In case of an unrecognized option or impossible
@@ -1685,6 +1691,8 @@
         std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
         std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
+        oat_writer->AddMethodDebugInfos(debug::MakeTrampolineInfos(oat_writer->GetOatHeader()));
+
         // We need to mirror the layout of the ELF file in the compressed debug-info.
         // Therefore PrepareDebugInfo() relies on the SetLoadedSectionSizes() call further above.
         elf_writer->PrepareDebugInfo(oat_writer->GetMethodDebugInfo());
@@ -1891,13 +1899,6 @@
     return success;
   }
 
-  bool ShouldCompileBasedOnProfiles() const {
-    DCHECK(UseProfileGuidedCompilation());
-    // If we are given a profile, compile only if we have some data in it.
-    return (profile_compilation_info_ != nullptr) &&
-        (profile_compilation_info_->GetNumberOfMethods() != 0);
-  }
-
  private:
   template <typename T>
   static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) {
@@ -2105,8 +2106,10 @@
     elf_writers_.reserve(oat_files_.size());
     oat_writers_.reserve(oat_files_.size());
     for (const std::unique_ptr<File>& oat_file : oat_files_) {
-      elf_writers_.emplace_back(
-          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file.get()));
+      elf_writers_.emplace_back(CreateElfWriterQuick(instruction_set_,
+                                                     instruction_set_features_.get(),
+                                                     compiler_options_.get(),
+                                                     oat_file.get()));
       elf_writers_.back()->Start();
       oat_writers_.emplace_back(new OatWriter(IsBootImage(), timings_));
     }
@@ -2593,16 +2596,11 @@
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   dex2oat->ParseArgs(argc, argv);
 
-  // Process profile information and assess if we need to do a profile guided compilation.
+  // If needed, process profile information for profile guided compilation.
   // This operation involves I/O.
   if (dex2oat->UseProfileGuidedCompilation()) {
-    if (dex2oat->LoadProfile()) {
-      if (!dex2oat->ShouldCompileBasedOnProfiles()) {
-        LOG(INFO) << "Skipped compilation because of insignificant profile delta";
-        return EXIT_SUCCESS;
-      }
-    } else {
-      LOG(WARNING) << "Failed to process profile files";
+    if (!dex2oat->LoadProfile()) {
+      LOG(ERROR) << "Failed to process profile file";
       return EXIT_FAILURE;
     }
   }
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index 039986c..bf563c7 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -89,7 +89,7 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For disassembler_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixld
+     LOCAL_SHARED_LIBRARIES += libvixl
   else
      LOCAL_SHARED_LIBRARIES += libvixl
   endif
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index c187536..3ed5766 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -32,6 +32,8 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
+#include "debug/elf_debug_writer.h"
+#include "debug/method_debug_info.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "disassembler.h"
@@ -98,6 +100,7 @@
   return ret;
 }
 
+template <typename ElfTypes>
 class OatSymbolizer FINAL {
  public:
   OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
@@ -105,29 +108,21 @@
       output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
   }
 
-  typedef void (OatSymbolizer::*Callback)(const DexFile::ClassDef&,
-                                          uint32_t,
-                                          const OatFile::OatMethod&,
-                                          const DexFile&,
-                                          uint32_t,
-                                          const DexFile::CodeItem*,
-                                          uint32_t);
-
   bool Symbolize() {
     const InstructionSet isa = oat_file_->GetOatHeader().GetInstructionSet();
+    const InstructionSetFeatures* features = InstructionSetFeatures::FromBitmap(
+        isa, oat_file_->GetOatHeader().GetInstructionSetFeaturesBitmap());
 
     File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
     std::unique_ptr<BufferedOutputStream> output_stream(
         MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
-    builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
+    builder_.reset(new ElfBuilder<ElfTypes>(isa, features, output_stream.get()));
 
     builder_->Start();
 
     auto* rodata = builder_->GetRoData();
     auto* text = builder_->GetText();
     auto* bss = builder_->GetBss();
-    auto* strtab = builder_->GetStrTab();
-    auto* symtab = builder_->GetSymTab();
 
     rodata->Start();
     const uint8_t* rodata_begin = oat_file_->Begin();
@@ -145,68 +140,38 @@
       bss->WriteNoBitsSection(oat_file_->BssSize());
     }
 
+    if (isa == kMips || isa == kMips64) {
+      builder_->WriteMIPSabiflagsSection();
+    }
     builder_->PrepareDynamicSection(
         elf_file->GetPath(), rodata_size, text_size, oat_file_->BssSize());
     builder_->WriteDynamicSection();
 
-    Walk(&art::OatSymbolizer::RegisterForDedup);
+    Walk();
+    for (const auto& trampoline : debug::MakeTrampolineInfos(oat_file_->GetOatHeader())) {
+      method_debug_infos_.push_back(trampoline);
+    }
 
-    NormalizeState();
-
-    strtab->Start();
-    strtab->Write("");  // strtab should start with empty string.
-    AddTrampolineSymbols();
-    Walk(&art::OatSymbolizer::AddSymbol);
-    strtab->End();
-
-    symtab->Start();
-    symtab->Write();
-    symtab->End();
+    debug::WriteDebugInfo(builder_.get(),
+                          ArrayRef<const debug::MethodDebugInfo>(method_debug_infos_),
+                          dwarf::DW_DEBUG_FRAME_FORMAT,
+                          true /* write_oat_patches */);
 
     builder_->End();
 
     return builder_->Good();
   }
 
-  void AddTrampolineSymbol(const char* name, uint32_t code_offset) {
-    if (code_offset != 0) {
-      uint32_t name_offset = builder_->GetStrTab()->Write(name);
-      uint64_t symbol_value = code_offset - oat_file_->GetOatHeader().GetExecutableOffset();
-      // Specifying 0 as the symbol size means that the symbol lasts until the next symbol or until
-      // the end of the section in case of the last symbol.
-      builder_->GetSymTab()->Add(name_offset, builder_->GetText(), symbol_value,
-          /* is_relative */ true, /* size */ 0, STB_GLOBAL, STT_FUNC);
-    }
-  }
-
-  void AddTrampolineSymbols() {
-    const OatHeader& oat_header = oat_file_->GetOatHeader();
-    AddTrampolineSymbol("interpreterToInterpreterBridge",
-                        oat_header.GetInterpreterToInterpreterBridgeOffset());
-    AddTrampolineSymbol("interpreterToCompiledCodeBridge",
-                        oat_header.GetInterpreterToCompiledCodeBridgeOffset());
-    AddTrampolineSymbol("jniDlsymLookup",
-                        oat_header.GetJniDlsymLookupOffset());
-    AddTrampolineSymbol("quickGenericJniTrampoline",
-                        oat_header.GetQuickGenericJniTrampolineOffset());
-    AddTrampolineSymbol("quickImtConflictTrampoline",
-                        oat_header.GetQuickImtConflictTrampolineOffset());
-    AddTrampolineSymbol("quickResolutionTrampoline",
-                        oat_header.GetQuickResolutionTrampolineOffset());
-    AddTrampolineSymbol("quickToInterpreterBridge",
-                        oat_header.GetQuickToInterpreterBridgeOffset());
-  }
-
-  void Walk(Callback callback) {
+  void Walk() {
     std::vector<const OatFile::OatDexFile*> oat_dex_files = oat_file_->GetOatDexFiles();
     for (size_t i = 0; i < oat_dex_files.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files[i];
       CHECK(oat_dex_file != nullptr);
-      WalkOatDexFile(oat_dex_file, callback);
+      WalkOatDexFile(oat_dex_file);
     }
   }
 
-  void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file, Callback callback) {
+  void WalkOatDexFile(const OatFile::OatDexFile* oat_dex_file) {
     std::string error_msg;
     const DexFile* const dex_file = OpenDexFile(oat_dex_file, &error_msg);
     if (dex_file == nullptr) {
@@ -215,13 +180,12 @@
     for (size_t class_def_index = 0;
         class_def_index < dex_file->NumClassDefs();
         class_def_index++) {
-      const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
       const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(class_def_index);
       OatClassType type = oat_class.GetType();
       switch (type) {
         case kOatClassAllCompiled:
         case kOatClassSomeCompiled:
-          WalkOatClass(oat_class, *dex_file, class_def, callback);
+          WalkOatClass(oat_class, *dex_file, class_def_index);
           break;
 
         case kOatClassNoneCompiled:
@@ -232,8 +196,10 @@
     }
   }
 
-  void WalkOatClass(const OatFile::OatClass& oat_class, const DexFile& dex_file,
-                    const DexFile::ClassDef& class_def, Callback callback) {
+  void WalkOatClass(const OatFile::OatClass& oat_class,
+                    const DexFile& dex_file,
+                    uint32_t class_def_index) {
+    const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
     const uint8_t* class_data = dex_file.GetClassData(class_def);
     if (class_data == nullptr) {  // empty class such as a marker interface?
       return;
@@ -241,111 +207,62 @@
     // Note: even if this is an interface or a native class, we still have to walk it, as there
     //       might be a static initializer.
     ClassDataItemIterator it(dex_file, class_data);
-    SkipAllFields(&it);
     uint32_t class_method_idx = 0;
-    while (it.HasNextDirectMethod()) {
-      const OatFile::OatMethod oat_method = oat_class.GetOatMethod(class_method_idx);
-      WalkOatMethod(class_def, class_method_idx, oat_method, dex_file, it.GetMemberIndex(),
-                    it.GetMethodCodeItem(), it.GetMethodAccessFlags(), callback);
-      class_method_idx++;
-      it.Next();
-    }
-    while (it.HasNextVirtualMethod()) {
-      const OatFile::OatMethod oat_method = oat_class.GetOatMethod(class_method_idx);
-      WalkOatMethod(class_def, class_method_idx, oat_method, dex_file, it.GetMemberIndex(),
-                    it.GetMethodCodeItem(), it.GetMethodAccessFlags(), callback);
-      class_method_idx++;
-      it.Next();
+    for (; it.HasNextStaticField(); it.Next()) { /* skip */ }
+    for (; it.HasNextInstanceField(); it.Next()) { /* skip */ }
+    for (; it.HasNextDirectMethod() || it.HasNextVirtualMethod(); it.Next()) {
+      WalkOatMethod(oat_class.GetOatMethod(class_method_idx++),
+                    dex_file,
+                    class_def_index,
+                    it.GetMemberIndex(),
+                    it.GetMethodCodeItem(),
+                    it.GetMethodAccessFlags());
     }
     DCHECK(!it.HasNext());
   }
 
-  void WalkOatMethod(const DexFile::ClassDef& class_def, uint32_t class_method_index,
-                     const OatFile::OatMethod& oat_method, const DexFile& dex_file,
-                     uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
-                     uint32_t method_access_flags, Callback callback) {
+  void WalkOatMethod(const OatFile::OatMethod& oat_method,
+                     const DexFile& dex_file,
+                     uint32_t class_def_index,
+                     uint32_t dex_method_index,
+                     const DexFile::CodeItem* code_item,
+                     uint32_t method_access_flags) {
     if ((method_access_flags & kAccAbstract) != 0) {
       // Abstract method, no code.
       return;
     }
-    if (oat_method.GetCodeOffset() == 0) {
+    const OatHeader& oat_header = oat_file_->GetOatHeader();
+    const OatQuickMethodHeader* method_header = oat_method.GetOatQuickMethodHeader();
+    if (method_header == nullptr || method_header->GetCodeSize() == 0) {
       // No code.
       return;
     }
 
-    (this->*callback)(class_def, class_method_index, oat_method, dex_file, dex_method_idx, code_item,
-                      method_access_flags);
-  }
-
-  void RegisterForDedup(const DexFile::ClassDef& class_def ATTRIBUTE_UNUSED,
-                        uint32_t class_method_index ATTRIBUTE_UNUSED,
-                        const OatFile::OatMethod& oat_method,
-                        const DexFile& dex_file ATTRIBUTE_UNUSED,
-                        uint32_t dex_method_idx ATTRIBUTE_UNUSED,
-                        const DexFile::CodeItem* code_item ATTRIBUTE_UNUSED,
-                        uint32_t method_access_flags ATTRIBUTE_UNUSED) {
-    state_[oat_method.GetCodeOffset()]++;
-  }
-
-  void NormalizeState() {
-    for (auto& x : state_) {
-      if (x.second == 1) {
-        state_[x.first] = 0;
-      }
-    }
-  }
-
-  enum class DedupState {  // private
-    kNotDeduplicated,
-    kDeduplicatedFirst,
-    kDeduplicatedOther
-  };
-  DedupState IsDuplicated(uint32_t offset) {
-    if (state_[offset] == 0) {
-      return DedupState::kNotDeduplicated;
-    }
-    if (state_[offset] == 1) {
-      return DedupState::kDeduplicatedOther;
-    }
-    state_[offset] = 1;
-    return DedupState::kDeduplicatedFirst;
-  }
-
-  void AddSymbol(const DexFile::ClassDef& class_def ATTRIBUTE_UNUSED,
-                 uint32_t class_method_index ATTRIBUTE_UNUSED,
-                 const OatFile::OatMethod& oat_method,
-                 const DexFile& dex_file,
-                 uint32_t dex_method_idx,
-                 const DexFile::CodeItem* code_item ATTRIBUTE_UNUSED,
-                 uint32_t method_access_flags ATTRIBUTE_UNUSED) {
-    DedupState dedup = IsDuplicated(oat_method.GetCodeOffset());
-    if (dedup != DedupState::kDeduplicatedOther) {
-      std::string pretty_name = PrettyMethod(dex_method_idx, dex_file, true);
-
-      if (dedup == DedupState::kDeduplicatedFirst) {
-        pretty_name = "[Dedup]" + pretty_name;
-      }
-
-      int name_offset = builder_->GetStrTab()->Write(pretty_name);
-      builder_->GetSymTab()->Add(name_offset, builder_->GetText(),
-          oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(),
-          true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC);
-    }
+    debug::MethodDebugInfo info = debug::MethodDebugInfo();
+    info.trampoline_name = nullptr;
+    info.dex_file = &dex_file;
+    info.class_def_index = class_def_index;
+    info.dex_method_index = dex_method_index;
+    info.access_flags = method_access_flags;
+    info.code_item = code_item;
+    info.isa = oat_header.GetInstructionSet();
+    info.deduped = !seen_offsets_.insert(oat_method.GetCodeOffset()).second;
+    info.is_native_debuggable = oat_header.IsNativeDebuggable();
+    info.is_optimized = method_header->IsOptimized();
+    info.is_code_address_text_relative = true;
+    info.code_address = oat_method.GetCodeOffset() - oat_header.GetExecutableOffset();
+    info.code_size = method_header->GetCodeSize();
+    info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
+    info.code_info = info.is_optimized ? method_header->GetOptimizedCodeInfoPtr() : nullptr;
+    info.cfi = ArrayRef<uint8_t>();
+    method_debug_infos_.push_back(info);
   }
 
  private:
-  static void SkipAllFields(ClassDataItemIterator* it) {
-    while (it->HasNextStaticField()) {
-      it->Next();
-    }
-    while (it->HasNextInstanceField()) {
-      it->Next();
-    }
-  }
-
   const OatFile* oat_file_;
-  std::unique_ptr<ElfBuilder<ElfTypes32> > builder_;
-  std::unordered_map<uint32_t, uint32_t> state_;
+  std::unique_ptr<ElfBuilder<ElfTypes> > builder_;
+  std::vector<debug::MethodDebugInfo> method_debug_infos_;
+  std::unordered_set<uint32_t> seen_offsets_;
   const std::string output_name_;
 };
 
@@ -2542,8 +2459,17 @@
     return EXIT_FAILURE;
   }
 
-  OatSymbolizer oat_symbolizer(oat_file, output_name);
-  if (!oat_symbolizer.Symbolize()) {
+  bool result;
+  // Try to produce an ELF file of the same type. This is finicky, as we have used 32-bit ELF
+  // files for 64-bit code in the past.
+  if (Is64BitInstructionSet(oat_file->GetOatHeader().GetInstructionSet())) {
+    OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file, output_name);
+    result = oat_symbolizer.Symbolize();
+  } else {
+    OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file, output_name);
+    result = oat_symbolizer.Symbolize();
+  }
+  if (!result) {
     fprintf(stderr, "Failed to symbolize\n");
     return EXIT_FAILURE;
   }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 500fa14..84660a3 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -501,7 +501,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,$(2)))
-    LOCAL_CLANG_arm64 := true
+    LOCAL_CLANG_ASFLAGS_arm += -no-integrated-as
     LOCAL_CFLAGS_$(DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
     LOCAL_CFLAGS_$(2ND_DEX2OAT_TARGET_ARCH) += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES)"
   else # host
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index cfcef49..64135d8 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -276,7 +276,7 @@
     bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*)
     add    sp, #16                       @ release out args
     .cfi_adjust_cfa_offset -16
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   @ TODO: we can clearly save an add here
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
     \return
 END \name
 .endm
@@ -812,14 +812,23 @@
 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
     .extern \entrypoint
 ENTRY \name
+    sub    sp, #12                    @ alignment padding
+    .cfi_adjust_cfa_offset 12
+    push   {r3}                       @ Save r3 as is it used as a temp register in the
+    .cfi_adjust_cfa_offset 4          @   expansion of the SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+    .cfi_rel_offset r3, 0             @   macro below, which clobbers its arguments.
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
+    ldr    r3, [sp, 32]               @ restore r3
+    .cfi_restore r3
+
     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
-    .pad #16
     .cfi_adjust_cfa_offset 16
     bl     \entrypoint
     add    sp, #16                    @ strip the extra frame
     .cfi_adjust_cfa_offset -16
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    add    sp, #16                    @ pop r3 + padding
+    .cfi_adjust_cfa_offset -16
     \return
 END \name
 .endm
@@ -943,85 +952,6 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
-ENTRY art_quick_alloc_object_tlab
-    // Fast path tlab allocation.
-    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
-    // r2, r3, r12: free.
-#if defined(USE_READ_BARRIER)
-    eor    r0, r0, r0                                         // Read barrier not supported here.
-    sub    r0, r0, #1                                         // Return -1.
-    bx     lr
-#endif
-    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
-                                                              // Load the class (r2)
-    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
-    cbz    r2, .Lart_quick_alloc_object_tlab_slow_path        // Check null class
-                                                              // Check class status.
-    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
-    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
-    bne    .Lart_quick_alloc_object_tlab_slow_path
-                                                              // Add a fake dependence from the
-                                                              // following access flag and size
-                                                              // loads to the status load.
-                                                              // This is to prevent those loads
-                                                              // from being reordered above the
-                                                              // status load and reading wrong
-                                                              // values (an alternative is to use
-                                                              // a load-acquire for the status).
-    eor    r3, r3, r3
-    add    r2, r2, r3
-                                                              // Check access flags has
-                                                              // kAccClassIsFinalizable.
-    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
-    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
-    bne    .Lart_quick_alloc_object_tlab_slow_path
-                                                              // Load thread_local_pos (r12) and
-                                                              // thread_local_end (r3) with ldrd.
-                                                              // Check constraints for ldrd.
-#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
-#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
-#endif
-    ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
-    sub    r12, r3, r12                                       // Compute the remaining buf size.
-    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3).
-    cmp    r3, r12                                            // Check if it fits. OK to do this
-                                                              // before rounding up the object size
-                                                              // assuming the buf size alignment.
-    bhi    .Lart_quick_alloc_object_tlab_slow_path
-    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
-                                                              // Round up the object size by the
-                                                              // object alignment. (addr + 7) & ~7.
-    add    r3, r3, #OBJECT_ALIGNMENT_MASK
-    and    r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED
-                                                              // Reload old thread_local_pos (r0)
-                                                              // for the return value.
-    ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
-    add    r1, r0, r3
-    str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
-    ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
-    add    r1, r1, #1
-    str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
-    POISON_HEAP_REF r2
-    str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
-                                                              // Fence. This is "ish" not "ishst" so
-                                                              // that the code after this allocation
-                                                              // site will see the right values in
-                                                              // the fields of the class.
-                                                              // Alternatively we could use "ishst"
-                                                              // if we use load-acquire for the
-                                                              // class status load.)
-    dmb    ish
-    bx     lr
-.Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
-    mov    r2, r9                                             // Pass Thread::Current.
-    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_tlab
-
-
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -1125,6 +1055,127 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free.
+// Need to preserve r0 and r1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    r2, \slowPathLabel                                 // Check null class
+                                                              // Check class status.
+    ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
+    cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
+    bne    \slowPathLabel
+                                                              // Add a fake dependence from the
+                                                              // following access flag and size
+                                                              // loads to the status load.
+                                                              // This is to prevent those loads
+                                                              // from being reordered above the
+                                                              // status load and reading wrong
+                                                              // values (an alternative is to use
+                                                              // a load-acquire for the status).
+    eor    r3, r3, r3
+    add    r2, r2, r3
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable.
+    ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+    tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
+    bne    \slowPathLabel
+                                                              // Load thread_local_pos (r12) and
+                                                              // thread_local_end (r3) with ldrd.
+                                                              // Check constraints for ldrd.
+#if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
+#error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
+#endif
+    ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
+    sub    r12, r3, r12                                       // Compute the remaining buf size.
+    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3).
+    cmp    r3, r12                                            // Check if it fits. OK to do this
+                                                              // before rounding up the object size
+                                                              // assuming the buf size alignment.
+    bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
+                                                              // Round up the object size by the
+                                                              // object alignment. (addr + 7) & ~7.
+    add    r3, r3, #OBJECT_ALIGNMENT_MASK
+    and    r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED
+                                                              // Reload old thread_local_pos (r0)
+                                                              // for the return value.
+    ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
+    add    r1, r0, r3
+    str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
+    ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
+    add    r1, r1, #1
+    str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
+    POISON_HEAP_REF r2
+    str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
+                                                              // Fence. This is "ish" not "ishst" so
+                                                              // that the code after this allocation
+                                                              // site will see the right values in
+                                                              // the fields of the class.
+                                                              // Alternatively we could use "ishst"
+                                                              // if we use load-acquire for the
+                                                              // class status load.)
+    dmb    ish
+    bx     lr
+.endm
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
+    // Fast path tlab allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
+    // r2, r3, r12: free.
+#if defined(USE_READ_BARRIER)
+    eor    r0, r0, r0                                         // Read barrier not supported here.
+    sub    r0, r0, #1                                         // Return -1.
+    bx     lr
+#endif
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_tlab
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+ENTRY art_quick_alloc_object_region_tlab
+    // Fast path tlab allocation.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free.
+#if !defined(USE_READ_BARRIER)
+    eor    r0, r0, r0                                         // Read barrier must be enabled here.
+    sub    r0, r0, #1                                         // Return -1.
+    bx     lr
+#endif
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+                                                              // Read barrier for class load.
+    ldr    r3, [r9, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    push   {r0, r1, r3, lr}                                   // Save registers. r3 is pushed only
+                                                              // to align sp by 16 bytes.
+    mov    r0, r2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    r2, r0                                             // Get the (marked) class back.
+    pop    {r0, r1, r3, lr}
+    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+END art_quick_alloc_object_region_tlab
+
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index e848008..e4c2558 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1537,7 +1537,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -1638,6 +1638,9 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END art_quick_alloc_object_rosalloc
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
     /*
      * Called by managed code when the thread has been asked to suspend.
      */
diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S
index 51e224c..801f708 100644
--- a/runtime/arch/mips/asm_support_mips.S
+++ b/runtime/arch/mips/asm_support_mips.S
@@ -129,4 +129,43 @@
 #endif  // USE_HEAP_POISONING
 .endm
 
+// Based on contents of creg select the minimum integer
+// At the end of the macro the original value of creg is lost
+.macro MINint dreg,rreg,sreg,creg
+  .set push
+  .set noat
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+  .ifc \dreg, \rreg
+  selnez \dreg, \rreg, \creg
+  seleqz \creg, \sreg, \creg
+  .else
+  seleqz \dreg, \sreg, \creg
+  selnez \creg, \rreg, \creg
+  .endif
+  or     \dreg, \dreg, \creg
+#else
+  movn   \dreg, \rreg, \creg
+  movz   \dreg, \sreg, \creg
+#endif
+  .set pop
+.endm
+
+// Find minimum of two signed registers
+.macro MINs dreg,rreg,sreg
+  .set push
+  .set noat
+  slt    $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
+// Find minimum of two unsigned registers
+.macro MINu dreg,rreg,sreg
+  .set push
+  .set noat
+  sltu   $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
 #endif  // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_S_
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 699ab3e..dbf0abb 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1313,7 +1313,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
 
@@ -1416,11 +1416,14 @@
 
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
     jal   artAllocObjectFromCodeRosAlloc
-    move  $a2 ,$s1                                                # Pass self as argument.
+    move  $a2, $s1                                                # Pass self as argument.
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 END art_quick_alloc_object_rosalloc
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
@@ -1744,5 +1747,74 @@
     nop
 END art_quick_ushr_long
 
-UNIMPLEMENTED art_quick_indexof
-UNIMPLEMENTED art_quick_string_compareto
+/* java.lang.String.indexOf(int ch, int fromIndex=0) */
+ENTRY_NO_GP art_quick_indexof
+/* $a0 holds address of "this" */
+/* $a1 holds "ch" */
+/* $a2 holds "fromIndex" */
+  lw    $t0, MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
+  slt   $at, $a2, $zero # if fromIndex < 0
+#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+  seleqz $a2, $a2, $at  #     fromIndex = 0;
+#else
+  movn   $a2, $zero, $at #    fromIndex = 0;
+#endif
+  subu  $t0, $t0, $a2   # this.length() - fromIndex
+  blez  $t0, 6f         # if this.length()-fromIndex <= 0
+  li    $v0, -1         #     return -1;
+
+  sll   $v0, $a2, 1     # $a0 += $a2 * 2
+  addu  $a0, $a0, $v0   #  "  "   "  " "
+  move  $v0, $a2        # Set i to fromIndex.
+
+1:
+  lhu   $t3, MIRROR_STRING_VALUE_OFFSET($a0)    # if this.charAt(i) == ch
+  beq   $t3, $a1, 6f                            #     return i;
+  addu  $a0, $a0, 2     # i++
+  subu  $t0, $t0, 1     # this.length() - i
+  bnez  $t0, 1b         # while this.length() - i > 0
+  addu  $v0, $v0, 1     # i++
+
+  li    $v0, -1         # if this.length() - i <= 0
+                        #     return -1;
+
+6:
+  j     $ra
+  nop
+END art_quick_indexof
+
+  .set push
+  .set noat
+/* java.lang.String.compareTo(String anotherString) */
+ENTRY_NO_GP art_quick_string_compareto
+/* $a0 holds address of "this" */
+/* $a1 holds address of "anotherString" */
+  beq    $a0, $a1, 9f   # this and anotherString are the same object
+  move   $v0, $zero
+
+  lw     $a2, MIRROR_STRING_COUNT_OFFSET($a0)   # this.length()
+  lw     $a3, MIRROR_STRING_COUNT_OFFSET($a1)   # anotherString.length()
+  MINu   $t2, $a2, $a3
+# $t2 now holds min(this.length(),anotherString.length())
+
+  beqz   $t2, 9f        # while min(this.length(),anotherString.length())-i != 0
+  subu   $v0, $a2, $a3  # if $t2==0 return
+                        #     (this.length() - anotherString.length())
+1:
+  lhu    $t0, MIRROR_STRING_VALUE_OFFSET($a0)   # while this.charAt(i) == anotherString.charAt(i)
+  lhu    $t1, MIRROR_STRING_VALUE_OFFSET($a1)
+  bne    $t0, $t1, 9f   # if this.charAt(i) != anotherString.charAt(i)
+  subu   $v0, $t0, $t1  #     return (this.charAt(i) - anotherString.charAt(i))
+  addiu  $a0, $a0, 2    # point at this.charAt(i++)
+  subu   $t2, $t2, 1    # new value of
+                        # min(this.length(),anotherString.length())-i
+  bnez   $t2, 1b
+  addiu  $a1, $a1, 2    # point at anotherString.charAt(i++)
+  subu   $v0, $a2, $a3
+
+9:
+  j      $ra
+  nop
+END art_quick_string_compareto
+
+  .set pop
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index b859c70..786e860 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -83,4 +83,38 @@
 #endif  // USE_HEAP_POISONING
 .endm
 
+// Based on contents of creg select the minimum integer
+// At the end of the macro the original value of creg is lost
+.macro MINint dreg,rreg,sreg,creg
+  .set push
+  .set noat
+  .ifc \dreg, \rreg
+  selnez \dreg, \rreg, \creg
+  seleqz \creg, \sreg, \creg
+  .else
+  seleqz \dreg, \sreg, \creg
+  selnez \creg, \rreg, \creg
+  .endif
+  or     \dreg, \dreg, \creg
+  .set pop
+.endm
+
+// Find minimum of two signed registers
+.macro MINs dreg,rreg,sreg
+  .set push
+  .set noat
+  slt    $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
+// Find minimum of two unsigned registers
+.macro MINu dreg,rreg,sreg
+  .set push
+  .set noat
+  sltu   $at, \rreg, \sreg
+  MINint \dreg, \rreg, \sreg, $at
+  .set pop
+.endm
+
 #endif  // ART_RUNTIME_ARCH_MIPS64_ASM_SUPPORT_MIPS64_S_
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index d264c9b..f1e605a 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1367,7 +1367,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 ENTRY art_quick_alloc_object_rosalloc
 
@@ -1467,6 +1467,9 @@
 
 END art_quick_alloc_object_rosalloc
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
      * exception on error. On success the String is returned. A0 holds the string index. The fast
@@ -1725,10 +1728,8 @@
 
   lw     $a2,MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
   lw     $a3,MIRROR_STRING_COUNT_OFFSET($a1)    # anotherString.length()
-  sltu   $at,$a2,$a3
-  seleqz $t2,$a3,$at
-  selnez $at,$a2,$at
-  or     $t2,$t2,$at    # $t2 now holds min(this.length(),anotherString.length())
+  MINu   $t2, $a2, $a3
+# $t2 now holds min(this.length(),anotherString.length())
 
   beqz   $t2,9f         # while min(this.length(),anotherString.length())-i != 0
   subu   $v0,$a2,$a3    # if $t2==0 return
@@ -1753,16 +1754,18 @@
 /* java.lang.String.indexOf(int ch, int fromIndex=0) */
 ENTRY_NO_GP art_quick_indexof
 /* $a0 holds address of "this" */
-/* $a1 holds address of "ch" */
-/* $a2 holds address of "fromIndex" */
+/* $a1 holds "ch" */
+/* $a2 holds "fromIndex" */
   lw    $t0,MIRROR_STRING_COUNT_OFFSET($a0)     # this.length()
-  subu  $t0,$t0,$a2     # this.length() - offset
-  blez  $t0,6f          # if this.length()-offset <= 0
+  slt   $at, $a2, $zero # if fromIndex < 0
+  seleqz $a2, $a2, $at  #     fromIndex = 0;
+  subu  $t0,$t0,$a2     # this.length() - fromIndex
+  blez  $t0,6f          # if this.length()-fromIndex <= 0
   li    $v0,-1          #     return -1;
 
   sll   $v0,$a2,1       # $a0 += $a2 * 2
   daddu $a0,$a0,$v0     #  "  "   "  " "
-  move  $v0,$a2         # Set i to offset.
+  move  $v0,$a2         # Set i to fromIndex.
 
 1:
   lhu   $t3,MIRROR_STRING_VALUE_OFFSET($a0)     # if this.charAt(i) == ch
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fbacdbc..290769b 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -219,7 +219,8 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index d5807e2..4236c28 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1205,7 +1205,7 @@
 
 TEST_F(StubTest, StringCompareTo) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || \
-    (defined(__mips__) && defined(__LP64__)) || (defined(__x86_64__) && !defined(__APPLE__))
+    defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -2054,7 +2054,7 @@
 }
 
 TEST_F(StubTest, StringIndexOf) {
-#if defined(__arm__) || defined(__aarch64__) || (defined(__mips__) && defined(__LP64__))
+#if defined(__arm__) || defined(__aarch64__) || defined(__mips__)
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 77b8e87..3e47209 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -142,6 +142,10 @@
     CFI_RESTORE(REG_VAR(reg))
 END_MACRO
 
+MACRO1(CFI_RESTORE_REG, reg)
+    CFI_RESTORE(REG_VAR(reg))
+END_MACRO
+
 #define UNREACHABLE int3
 
 MACRO1(UNIMPLEMENTED,name)
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index fbee5d7..125570d 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -686,7 +686,15 @@
 
 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
     DEFINE_FUNCTION VAR(c_name)
+    subl MACRO_LITERAL(12), %esp                 // alignment padding
+    CFI_ADJUST_CFA_OFFSET(12)
+    PUSH ebx                                     // Save ebx as the expansion of the
+                                                 //   SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
+                                                 //   macro below clobbers it.
     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  ebx, ebx  // save ref containing registers for GC
+    movl 28(%esp), %ebx                          // restore ebx
+    CFI_RESTORE_REG ebx
+
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp                 // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
@@ -700,6 +708,8 @@
     addl MACRO_LITERAL(32), %esp                 // pop arguments
     CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // restore frame up to return address
+    addl MACRO_LITERAL(16), %esp                 // pop ebx + padding
+    CFI_ADJUST_CFA_OFFSET(-16)
     CALL_MACRO(return_macro)                     // return or deliver exception
     END_FUNCTION VAR(c_name)
 END_MACRO
@@ -887,8 +897,8 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
 
-
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 69caec8..dee8d3c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -809,6 +809,7 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
 DEFINE_FUNCTION art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
@@ -943,6 +944,8 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_FUNCTION art_quick_alloc_object_tlab
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 879364e..d5f0dff 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -101,6 +101,11 @@
 ADD_TEST_EQ(THREAD_ID_OFFSET,
             art::Thread::ThinLockIdOffset<__SIZEOF_POINTER__>().Int32Value())
 
+// Offset of field Thread::tls32_.is_gc_marking.
+#define THREAD_IS_GC_MARKING_OFFSET 52
+ADD_TEST_EQ(THREAD_IS_GC_MARKING_OFFSET,
+            art::Thread::IsGcMarkingOffset<__SIZEOF_POINTER__>().Int32Value())
+
 // Offset of field Thread::tlsPtr_.card_table.
 #define THREAD_CARD_TABLE_OFFSET 128
 ADD_TEST_EQ(THREAD_CARD_TABLE_OFFSET,
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 4672948..e4097dd 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -234,21 +234,34 @@
   return ReadFullyGeneric<pread>(fd_, buffer, byte_count, offset);
 }
 
-bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
+template <bool kUseOffset>
+bool FdFile::WriteFullyGeneric(const void* buffer, size_t byte_count, size_t offset) {
   DCHECK(!read_only_mode_);
-  const char* ptr = static_cast<const char*>(buffer);
   moveTo(GuardState::kBase, GuardState::kClosed, "Writing into closed file.");
+  DCHECK(kUseOffset || offset == 0u);
+  const char* ptr = static_cast<const char*>(buffer);
   while (byte_count > 0) {
-    ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
+    ssize_t bytes_written = kUseOffset
+        ? TEMP_FAILURE_RETRY(pwrite(fd_, ptr, byte_count, offset))
+        : TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
     if (bytes_written == -1) {
       return false;
     }
     byte_count -= bytes_written;  // Reduce the number of remaining bytes.
     ptr += bytes_written;  // Move the buffer forward.
+    offset += static_cast<size_t>(bytes_written);
   }
   return true;
 }
 
+bool FdFile::PwriteFully(const void* buffer, size_t byte_count, size_t offset) {
+  return WriteFullyGeneric<true>(buffer, byte_count, offset);
+}
+
+bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
+  return WriteFullyGeneric<false>(buffer, byte_count, 0u);
+}
+
 bool FdFile::Copy(FdFile* input_file, int64_t offset, int64_t size) {
   DCHECK(!read_only_mode_);
   off_t off = static_cast<off_t>(offset);
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 8040afe..16cd44f 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -79,6 +79,7 @@
   bool ReadFully(void* buffer, size_t byte_count) WARN_UNUSED;
   bool PreadFully(void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
   bool WriteFully(const void* buffer, size_t byte_count) WARN_UNUSED;
+  bool PwriteFully(const void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
 
   // Copy data from another file.
   bool Copy(FdFile* input_file, int64_t offset, int64_t size);
@@ -119,6 +120,9 @@
   GuardState guard_state_;
 
  private:
+  template <bool kUseOffset>
+  bool WriteFullyGeneric(const void* buffer, size_t byte_count, size_t offset);
+
   int fd_;
   std::string file_path_;
   bool auto_close_;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index ecf607c..9bc87e5 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -110,6 +110,34 @@
   ASSERT_EQ(file.Close(), 0);
 }
 
+TEST_F(FdFileTest, ReadWriteFullyWithOffset) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  FdFile file;
+  ASSERT_TRUE(file.Open(tmp.GetFilename(), O_RDWR));
+  EXPECT_GE(file.Fd(), 0);
+  EXPECT_TRUE(file.IsOpened());
+
+  const char* test_string = "This is a test string";
+  size_t length = strlen(test_string) + 1;
+  const size_t offset = 12;
+  std::unique_ptr<char[]> offset_read_string(new char[length]);
+  std::unique_ptr<char[]> read_string(new char[length]);
+
+  // Write scratch data to file that we can read back into.
+  EXPECT_TRUE(file.PwriteFully(test_string, length, offset));
+  ASSERT_EQ(file.Flush(), 0);
+
+  // Test reading both the offsets.
+  EXPECT_TRUE(file.PreadFully(&offset_read_string[0], length, offset));
+  EXPECT_STREQ(test_string, &offset_read_string[0]);
+
+  EXPECT_TRUE(file.PreadFully(&read_string[0], length, 0u));
+  EXPECT_NE(memcmp(&read_string[0], test_string, length), 0);
+
+  ASSERT_EQ(file.Close(), 0);
+}
+
 TEST_F(FdFileTest, Copy) {
   art::ScratchFile src_tmp;
   FdFile src;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 01d140a..d51a1f7 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -7714,7 +7714,10 @@
       }
       ++num_resolved;
       DCHECK(!klass->IsProxyClass());
-      DCHECK(klass->IsResolved());
+      if (!klass->IsResolved()) {
+        DCHECK(klass->IsErroneous());
+        continue;
+      }
       mirror::DexCache* klass_dex_cache = klass->GetDexCache();
       if (klass_dex_cache == dex_cache) {
         const size_t class_def_idx = klass->GetDexClassDefIndex();
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 3df9101..729957f 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -406,6 +406,7 @@
   int rmdir_cache_result = rmdir(dalvik_cache_.c_str());
   ASSERT_EQ(0, rmdir_cache_result);
   TearDownAndroidData(android_data_, true);
+  dalvik_cache_.clear();
 
   // icu4c has a fixed 10-element array "gCommonICUDataArray".
   // If we run > 10 tests, we fill that array and u_setCommonData fails.
diff --git a/runtime/elf.h b/runtime/elf.h
index d1efc92..63b18c5 100644
--- a/runtime/elf.h
+++ b/runtime/elf.h
@@ -1284,6 +1284,7 @@
 
   SHT_MIPS_REGINFO        = 0x70000006, // Register usage information
   SHT_MIPS_OPTIONS        = 0x7000000d, // General options
+  SHT_MIPS_ABIFLAGS       = 0x7000002a, // Abiflags options
 
   SHT_HIPROC        = 0x7fffffff, // Highest processor arch-specific type.
   SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
@@ -1606,7 +1607,8 @@
   // MIPS program header types.
   PT_MIPS_REGINFO  = 0x70000000,  // Register usage information.
   PT_MIPS_RTPROC   = 0x70000001,  // Runtime procedure table.
-  PT_MIPS_OPTIONS  = 0x70000002   // Options segment.
+  PT_MIPS_OPTIONS  = 0x70000002,  // Options segment.
+  PT_MIPS_ABIFLAGS = 0x70000003   // Abiflags segment.
 };
 
 // Segment flag bits.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index faa3d3b..2e5b599 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -59,6 +59,8 @@
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -2668,6 +2670,12 @@
     // permanantly disabled. b/17942071
     concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   }
+
+  if ((gc_type == collector::kGcTypeFull) && runtime->UseJit()) {
+    // It's time to clear all inline caches, in case some classes can be unloaded.
+    runtime->GetJit()->GetCodeCache()->ClearGcRootsInInlineCaches(self);
+  }
+
   CHECK(collector != nullptr)
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index a4e5587..9ecd391 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -1535,50 +1535,31 @@
   //              images[0] is          f/c/d/e.art
   // ----------------------------------------------
   //              images[1] is          g/h/i/j.art  -> /a/b/h/i/j.art
-
-  // Derive pattern.
-  std::vector<std::string> left;
-  Split(input_image_file_name, '/', &left);
-  std::vector<std::string> right;
-  Split(images[0], '/', &right);
-
-  size_t common = 1;
-  while (common < left.size() && common < right.size()) {
-    if (left[left.size() - common - 1] != right[right.size() - common - 1]) {
-      break;
-    }
-    common++;
+  const std::string& first_image = images[0];
+  // Length of common suffix.
+  size_t common = 0;
+  while (common < input_image_file_name.size() &&
+         common < first_image.size() &&
+         *(input_image_file_name.end() - common - 1) == *(first_image.end() - common - 1)) {
+    ++common;
   }
-
-  std::vector<std::string> prefix_vector(left.begin(), left.end() - common);
-  std::string common_prefix = Join(prefix_vector, '/');
-  if (!common_prefix.empty() && common_prefix[0] != '/' && input_image_file_name[0] == '/') {
-    common_prefix = "/" + common_prefix;
-  }
+  // We want to replace the prefix of the input image with the prefix of the boot class path.
+  // This handles the case where the image file contains @ separators.
+  // Example image_file_name is oats/system@framework@boot.art
+  // images[0] is .../arm/boot.art
+  // means that the image name prefix will be oats/system@framework@
+  // so that the other images are openable.
+  const size_t old_prefix_length = first_image.size() - common;
+  const std::string new_prefix = input_image_file_name.substr(
+      0,
+      input_image_file_name.size() - common);
 
   // Apply pattern to images[1] .. images[n].
   for (size_t i = 1; i < images.size(); ++i) {
-    std::string image = images[i];
-
-    size_t rslash = std::string::npos;
-    for (size_t j = 0; j < common; ++j) {
-      if (rslash != std::string::npos) {
-        rslash--;
-      }
-
-      rslash = image.rfind('/', rslash);
-      if (rslash == std::string::npos) {
-        rslash = 0;
-      }
-      if (rslash == 0) {
-        break;
-      }
-    }
-    std::string image_part = image.substr(rslash);
-
-    std::string new_image = common_prefix + (StartsWith(image_part, "/") ? "" : "/") +
-        image_part;
-    image_file_names->push_back(new_image);
+    const std::string& image = images[i];
+    CHECK_GT(image.length(), old_prefix_length);
+    std::string suffix = image.substr(old_prefix_length);
+    image_file_names->push_back(new_prefix + suffix);
   }
 }
 
diff --git a/runtime/interpreter/mterp/arm/binopLit8.S b/runtime/interpreter/mterp/arm/binopLit8.S
index ec0b3c4..b8f0d92 100644
--- a/runtime/interpreter/mterp/arm/binopLit8.S
+++ b/runtime/interpreter/mterp/arm/binopLit8.S
@@ -13,7 +13,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
diff --git a/runtime/interpreter/mterp/arm/binopWide.S b/runtime/interpreter/mterp/arm/binopWide.S
index 1d511ec..4d88001 100644
--- a/runtime/interpreter/mterp/arm/binopWide.S
+++ b/runtime/interpreter/mterp/arm/binopWide.S
@@ -19,9 +19,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if $chkzero
diff --git a/runtime/interpreter/mterp/arm/binopWide2addr.S b/runtime/interpreter/mterp/arm/binopWide2addr.S
index 81db48b..bb16335 100644
--- a/runtime/interpreter/mterp/arm/binopWide2addr.S
+++ b/runtime/interpreter/mterp/arm/binopWide2addr.S
@@ -16,8 +16,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if $chkzero
diff --git a/runtime/interpreter/mterp/arm/entry.S b/runtime/interpreter/mterp/arm/entry.S
index 4c5ffc5..981c036 100644
--- a/runtime/interpreter/mterp/arm/entry.S
+++ b/runtime/interpreter/mterp/arm/entry.S
@@ -47,8 +47,8 @@
     /* set up "named" registers */
     mov     rSELF, r0
     ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
-    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to vregs.
+    VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
     ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
     add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
     add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
diff --git a/runtime/interpreter/mterp/arm/fbinop2addr.S b/runtime/interpreter/mterp/arm/fbinop2addr.S
index b052a29..53c87a0 100644
--- a/runtime/interpreter/mterp/arm/fbinop2addr.S
+++ b/runtime/interpreter/mterp/arm/fbinop2addr.S
@@ -7,14 +7,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     $instr                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
diff --git a/runtime/interpreter/mterp/arm/fbinopWide2addr.S b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
index 4e7401d..9766e2c 100644
--- a/runtime/interpreter/mterp/arm/fbinopWide2addr.S
+++ b/runtime/interpreter/mterp/arm/fbinopWide2addr.S
@@ -8,11 +8,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
diff --git a/runtime/interpreter/mterp/arm/funop.S b/runtime/interpreter/mterp/arm/funop.S
index d7a0859..1b8bb8b 100644
--- a/runtime/interpreter/mterp/arm/funop.S
+++ b/runtime/interpreter/mterp/arm/funop.S
@@ -6,11 +6,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     $instr                              @ s1<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
diff --git a/runtime/interpreter/mterp/arm/funopNarrower.S b/runtime/interpreter/mterp/arm/funopNarrower.S
index 9daec28..b9f758b 100644
--- a/runtime/interpreter/mterp/arm/funopNarrower.S
+++ b/runtime/interpreter/mterp/arm/funopNarrower.S
@@ -6,11 +6,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     $instr                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
diff --git a/runtime/interpreter/mterp/arm/funopWider.S b/runtime/interpreter/mterp/arm/funopWider.S
index 450ba3a..854cdc9 100644
--- a/runtime/interpreter/mterp/arm/funopWider.S
+++ b/runtime/interpreter/mterp/arm/funopWider.S
@@ -6,11 +6,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     $instr                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_aget_wide.S b/runtime/interpreter/mterp/arm/op_aget_wide.S
index e1430b4..853a7a4 100644
--- a/runtime/interpreter/mterp/arm/op_aget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_aget_wide.S
@@ -19,7 +19,7 @@
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_aput_wide.S b/runtime/interpreter/mterp/arm/op_aput_wide.S
index 49839d1..0057507 100644
--- a/runtime/interpreter/mterp/arm/op_aput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_aput_wide.S
@@ -15,7 +15,7 @@
     ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
     add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
     cmp     r1, r3                      @ compare unsigned index, length
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
diff --git a/runtime/interpreter/mterp/arm/op_cmp_long.S b/runtime/interpreter/mterp/arm/op_cmp_long.S
index 2b4c0ea..e57b19c 100644
--- a/runtime/interpreter/mterp/arm/op_cmp_long.S
+++ b/runtime/interpreter/mterp/arm/op_cmp_long.S
@@ -23,8 +23,8 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     cmp     r1, r3                      @ compare (vBB+1, vCC+1)
diff --git a/runtime/interpreter/mterp/arm/op_const.S b/runtime/interpreter/mterp/arm/op_const.S
index de3e3c3..39890a0 100644
--- a/runtime/interpreter/mterp/arm/op_const.S
+++ b/runtime/interpreter/mterp/arm/op_const.S
@@ -1,7 +1,7 @@
     /* const vAA, #+BBBBbbbb */
     mov     r3, rINST, lsr #8           @ r3<- AA
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (high)
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
     GET_INST_OPCODE ip                  @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_const_16.S b/runtime/interpreter/mterp/arm/op_const_16.S
index 59c6dac..a30cf3a 100644
--- a/runtime/interpreter/mterp/arm/op_const_16.S
+++ b/runtime/interpreter/mterp/arm/op_const_16.S
@@ -1,5 +1,5 @@
     /* const/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     SET_VREG r0, r3                     @ vAA<- r0
diff --git a/runtime/interpreter/mterp/arm/op_const_4.S b/runtime/interpreter/mterp/arm/op_const_4.S
index c177bb9..c97b0e9 100644
--- a/runtime/interpreter/mterp/arm/op_const_4.S
+++ b/runtime/interpreter/mterp/arm/op_const_4.S
@@ -1,8 +1,7 @@
     /* const/4 vA, #+B */
-    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    sbfx    r1, rINST, #12, #4          @ r1<- sssssssB (sign-extended)
     ubfx    r0, rINST, #8, #4           @ r0<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  @ ip<- opcode from rINST
     SET_VREG r1, r0                     @ fp[A]<- r1
     GOTO_OPCODE ip                      @ execute next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_high16.S b/runtime/interpreter/mterp/arm/op_const_high16.S
index 460d546..536276d 100644
--- a/runtime/interpreter/mterp/arm/op_const_high16.S
+++ b/runtime/interpreter/mterp/arm/op_const_high16.S
@@ -1,5 +1,5 @@
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r0, r0, lsl #16             @ r0<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
index 1a3d0b2..1255c07 100644
--- a/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/arm/op_const_string_jumbo.S
@@ -1,7 +1,7 @@
     /* const/string vAA, String@BBBBBBBB */
     EXPORT_PC
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r2, 2                         @ r2<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r2, 2                         @ r2<- BBBB (high)
     mov     r1, rINST, lsr #8           @ r1<- AA
     orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
     add     r2, rFP, #OFF_FP_SHADOWFRAME
diff --git a/runtime/interpreter/mterp/arm/op_const_wide.S b/runtime/interpreter/mterp/arm/op_const_wide.S
index 12394b6..8310a4c 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide.S
@@ -8,7 +8,7 @@
     orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
     CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
     FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_16.S b/runtime/interpreter/mterp/arm/op_const_wide_16.S
index 3811d86..28abb51 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide_16.S
@@ -1,10 +1,10 @@
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r1, r0, asr #31             @ r1<- ssssssss
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_32.S b/runtime/interpreter/mterp/arm/op_const_wide_32.S
index 0b6f1cc..c10bb04 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide_32.S
@@ -5,7 +5,7 @@
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     mov     r1, r0, asr #31             @ r1<- ssssssss
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
diff --git a/runtime/interpreter/mterp/arm/op_const_wide_high16.S b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
index b9796eb..d7e38ec 100644
--- a/runtime/interpreter/mterp/arm/op_const_wide_high16.S
+++ b/runtime/interpreter/mterp/arm/op_const_wide_high16.S
@@ -5,7 +5,7 @@
     mov     r1, r1, lsl #16             @ r1<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide.S b/runtime/interpreter/mterp/arm/op_iget_wide.S
index 859ffac..e287d51 100644
--- a/runtime/interpreter/mterp/arm/op_iget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_iget_wide.S
@@ -16,7 +16,7 @@
     cmp      r3, #0
     bne      MterpException                @ bail out
     CLEAR_SHADOW_PAIR r2, ip, lr           @ Zero out the shadow regs
-    add      r3, rFP, r2, lsl #2           @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2              @ r3<- &fp[A]
     stmia    r3, {r0-r1}                   @ fp[A]<- r0/r1
     ADVANCE 2
     GET_INST_OPCODE ip                     @ extract opcode from rINST
diff --git a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
index 07f854a..5a7177d 100644
--- a/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iget_wide_quick.S
@@ -7,7 +7,7 @@
     beq     common_errNullObject        @ object was null
     ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
     CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
diff --git a/runtime/interpreter/mterp/arm/op_instance_of.S b/runtime/interpreter/mterp/arm/op_instance_of.S
index d76f0b0..019929e 100644
--- a/runtime/interpreter/mterp/arm/op_instance_of.S
+++ b/runtime/interpreter/mterp/arm/op_instance_of.S
@@ -11,10 +11,9 @@
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
     mov       r3, rSELF                 @ r3<- self
-    mov       r9, rINST, lsr #8         @ r9<- A+
-    and       r9, r9, #15               @ r9<- A
     bl        MterpInstanceOf           @ (index, &obj, method, self)
     ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      r9, rINST, #8, #4         @ r9<- A
     PREFETCH_INST 2
     cmp       r1, #0                    @ exception pending?
     bne       MterpException
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide.S b/runtime/interpreter/mterp/arm/op_iput_wide.S
index 8bbd63e..3dda187 100644
--- a/runtime/interpreter/mterp/arm/op_iput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_iput_wide.S
@@ -5,7 +5,7 @@
     mov      r1, rINST, lsr #12         @ r1<- B
     GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
     ubfx     r2, rINST, #8, #4          @ r2<- A
-    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[A]
     ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
diff --git a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
index a2fc9e1..88e6ea1 100644
--- a/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/arm/op_iput_wide_quick.S
@@ -5,7 +5,7 @@
     ubfx    r0, rINST, #8, #4           @ r0<- A
     cmp     r2, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
     ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     strd    r0, [r2, r3]                @ obj.field<- r0/r1
diff --git a/runtime/interpreter/mterp/arm/op_long_to_double.S b/runtime/interpreter/mterp/arm/op_long_to_double.S
index 1d48a2a..cac12d4 100644
--- a/runtime/interpreter/mterp/arm/op_long_to_double.S
+++ b/runtime/interpreter/mterp/arm/op_long_to_double.S
@@ -8,8 +8,8 @@
      */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     vldr    d0, [r3]                    @ d0<- vAA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
 
diff --git a/runtime/interpreter/mterp/arm/op_move_result_wide.S b/runtime/interpreter/mterp/arm/op_move_result_wide.S
index 1845ccf..87929ea 100644
--- a/runtime/interpreter/mterp/arm/op_move_result_wide.S
+++ b/runtime/interpreter/mterp/arm/op_move_result_wide.S
@@ -1,7 +1,7 @@
     /* move-result-wide vAA */
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_move_wide.S b/runtime/interpreter/mterp/arm/op_move_wide.S
index f5d156d..ff353ea 100644
--- a/runtime/interpreter/mterp/arm/op_move_wide.S
+++ b/runtime/interpreter/mterp/arm/op_move_wide.S
@@ -2,8 +2,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_16.S b/runtime/interpreter/mterp/arm/op_move_wide_16.S
index 8a55c4b..9812b66 100644
--- a/runtime/interpreter/mterp/arm/op_move_wide_16.S
+++ b/runtime/interpreter/mterp/arm/op_move_wide_16.S
@@ -2,8 +2,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 2                         @ r3<- BBBB
     FETCH r2, 1                         @ r2<- AAAA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     lr, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
diff --git a/runtime/interpreter/mterp/arm/op_move_wide_from16.S b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
index b65259d..d2cc60c 100644
--- a/runtime/interpreter/mterp/arm/op_move_wide_from16.S
+++ b/runtime/interpreter/mterp/arm/op_move_wide_from16.S
@@ -2,8 +2,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 1                         @ r3<- BBBB
     mov     rINST, rINST, lsr #8        @ rINST<- AA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_mul_long.S b/runtime/interpreter/mterp/arm/op_mul_long.S
index 9e83778..8f40f19 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long.S
@@ -20,8 +20,8 @@
     FETCH r0, 1                         @ r0<- CCBB
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     mul     ip, r2, r1                  @  ip<- ZxW
@@ -29,7 +29,7 @@
     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
diff --git a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
index 789dbd3..7ef24c5 100644
--- a/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_mul_long_2addr.S
@@ -9,8 +9,8 @@
     /* mul-long/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
     mul     ip, r2, r1                  @  ip<- ZxW
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
index cfab530..ceae878 100644
--- a/runtime/interpreter/mterp/arm/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -9,6 +9,6 @@
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
     ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
     b       MterpReturn
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
index 3a50908..4f2f89d 100644
--- a/runtime/interpreter/mterp/arm/op_sget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -12,7 +12,7 @@
     bl    artGet64StaticFromCode
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
-    add   lr, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
     cmp   r3, #0                        @ Fail to resolve?
     bne   MterpException                @ bail out
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/op_shl_long.S b/runtime/interpreter/mterp/arm/op_shl_long.S
index 12ea248..82ec6ed 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long.S
@@ -9,12 +9,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r1, r1, asl r2              @ r1<- r1 << r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
diff --git a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
index 4799e77..f361a7d 100644
--- a/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shl_long_2addr.S
@@ -7,7 +7,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r1, r1, asl r2              @ r1<- r1 << r2
diff --git a/runtime/interpreter/mterp/arm/op_shr_long.S b/runtime/interpreter/mterp/arm/op_shr_long.S
index 88a13d6..a0afe5b 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long.S
@@ -9,12 +9,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
diff --git a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
index 78d8bb7..976110e 100644
--- a/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_shr_long_2addr.S
@@ -7,7 +7,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
index adbcffa..8d8ed8c 100644
--- a/runtime/interpreter/mterp/arm/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -8,7 +8,7 @@
     FETCH   r0, 1                       @ r0<- field ref BBBB
     ldr     r1, [rFP, #OFF_FP_METHOD]
     mov     r2, rINST, lsr #8           @ r3<- AA
-    add     r2, rFP, r2, lsl #2
+    VREG_INDEX_TO_ADDR r2, r2
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long.S b/runtime/interpreter/mterp/arm/op_ushr_long.S
index f98ec63..c817bc9 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long.S
@@ -9,12 +9,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
diff --git a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
index 840283d..2735f87 100644
--- a/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/arm/op_ushr_long_2addr.S
@@ -7,7 +7,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
diff --git a/runtime/interpreter/mterp/arm/unopNarrower.S b/runtime/interpreter/mterp/arm/unopNarrower.S
index a5fc027..2d0453a 100644
--- a/runtime/interpreter/mterp/arm/unopNarrower.S
+++ b/runtime/interpreter/mterp/arm/unopNarrower.S
@@ -12,7 +12,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     $preinstr                           @ optional op; may set condition codes
diff --git a/runtime/interpreter/mterp/arm/unopWide.S b/runtime/interpreter/mterp/arm/unopWide.S
index a074234..cd5defd 100644
--- a/runtime/interpreter/mterp/arm/unopWide.S
+++ b/runtime/interpreter/mterp/arm/unopWide.S
@@ -9,8 +9,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm/unopWider.S b/runtime/interpreter/mterp/arm/unopWider.S
index 23b6b9d..9d50489 100644
--- a/runtime/interpreter/mterp/arm/unopWider.S
+++ b/runtime/interpreter/mterp/arm/unopWider.S
@@ -10,7 +10,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     GET_VREG r0, r3                     @ r0<- vB
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     $preinstr                           @ optional op; may set condition codes
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/arm64/entry.S b/runtime/interpreter/mterp/arm64/entry.S
index f9073ab..23e656e 100644
--- a/runtime/interpreter/mterp/arm64/entry.S
+++ b/runtime/interpreter/mterp/arm64/entry.S
@@ -46,7 +46,7 @@
     /* set up "named" registers */
     mov     xSELF, x0
     ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.
     add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
     add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S
index 7223750..7101ba9 100644
--- a/runtime/interpreter/mterp/arm64/header.S
+++ b/runtime/interpreter/mterp/arm64/header.S
@@ -272,7 +272,7 @@
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
-    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING: handle shadow frame vreg zero if store */
 .endm
 
 /*
diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide.S b/runtime/interpreter/mterp/arm64/op_iput_wide.S
index 4ce9525..e1ab127 100644
--- a/runtime/interpreter/mterp/arm64/op_iput_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_iput_wide.S
@@ -5,7 +5,7 @@
     lsr      w1, wINST, #12             // w1<- B
     GET_VREG w1, w1                     // w1<- fp[B], the object pointer
     ubfx     w2, wINST, #8, #4          // w2<- A
-    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    VREG_INDEX_TO_ADDR x2, x2           // w2<- &fp[A]
     ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
index 1d034ec..a79b1a6 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S
@@ -8,7 +8,7 @@
     FETCH   w0, 1                       // w0<- field ref BBBB
     ldr     x1, [xFP, #OFF_FP_METHOD]
     lsr     w2, wINST, #8               // w3<- AA
-    add     x2, xFP, w2, lsl #2
+    VREG_INDEX_TO_ADDR x2, w2
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
diff --git a/runtime/interpreter/mterp/mips/entry.S b/runtime/interpreter/mterp/mips/entry.S
index cef08fe..5771a4f 100644
--- a/runtime/interpreter/mterp/mips/entry.S
+++ b/runtime/interpreter/mterp/mips/entry.S
@@ -49,7 +49,7 @@
     /* set up "named" registers */
     move    rSELF, a0
     lw      a0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
-    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to insns[] (i.e. - the dalivk byte code).
+    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to vregs.
     EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
     lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
     addu    rPC, a1, CODEITEM_INSNS_OFFSET        # Point to base of insns[]
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index b26a63a..092474d 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -343,8 +343,8 @@
     /* set up "named" registers */
     mov     rSELF, r0
     ldr     r0, [r2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to insns[] (i.e. - the dalivk byte code).
-    add     rREFS, rFP, r0, lsl #2                 @ point to reference array in shadow frame
+    add     rFP, r2, #SHADOWFRAME_VREGS_OFFSET     @ point to vregs.
+    VREG_INDEX_TO_ADDR rREFS, r0                   @ point to reference array in shadow frame
     ldr     r0, [r2, #SHADOWFRAME_DEX_PC_OFFSET]   @ Get starting dex_pc.
     add     rPC, r1, #CODEITEM_INSNS_OFFSET        @ Point to base of insns[]
     add     rPC, rPC, r0, lsl #1                   @ Create direct pointer to 1st dex opcode
@@ -435,8 +435,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[B]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -452,8 +452,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 1                         @ r3<- BBBB
     mov     rINST, rINST, lsr #8        @ rINST<- AA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
@@ -469,8 +469,8 @@
     /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
     FETCH r3, 2                         @ r3<- BBBB
     FETCH r2, 1                         @ r2<- AAAA
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BBBB]
-    add     lr, rFP, r2, lsl #2         @ r2<- &fp[AAAA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BBBB]
+    VREG_INDEX_TO_ADDR lr, r2           @ r2<- &fp[AAAA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- fp[BBBB]
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r2, r3, ip        @ Zero out the shadow regs
@@ -563,7 +563,7 @@
     /* move-result-wide vAA */
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     ldr     r3, [rFP, #OFF_FP_RESULT_REGISTER]
-    add     r2, rFP, rINST, lsl #2      @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, rINST        @ r2<- &fp[AA]
     ldmia   r3, {r0-r1}                 @ r0/r1<- retval.j
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero out the shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -655,7 +655,7 @@
     ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
     ldmia   r2, {r0-r1}                 @ r0/r1 <- vAA/vAA+1
     b       MterpReturn
 
@@ -687,10 +687,9 @@
 .L_op_const_4: /* 0x12 */
 /* File: arm/op_const_4.S */
     /* const/4 vA, #+B */
-    mov     r1, rINST, lsl #16          @ r1<- Bxxx0000
+    sbfx    r1, rINST, #12, #4          @ r1<- sssssssB (sign-extended)
     ubfx    r0, rINST, #8, #4           @ r0<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    mov     r1, r1, asr #28             @ r1<- sssssssB (sign-extended)
     GET_INST_OPCODE ip                  @ ip<- opcode from rINST
     SET_VREG r1, r0                     @ fp[A]<- r1
     GOTO_OPCODE ip                      @ execute next instruction
@@ -700,7 +699,7 @@
 .L_op_const_16: /* 0x13 */
 /* File: arm/op_const_16.S */
     /* const/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     SET_VREG r0, r3                     @ vAA<- r0
@@ -713,8 +712,8 @@
 /* File: arm/op_const.S */
     /* const vAA, #+BBBBbbbb */
     mov     r3, rINST, lsr #8           @ r3<- AA
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r1, 2                         @ r1<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r1, 2                         @ r1<- BBBB (high)
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r1, lsl #16         @ r0<- BBBBbbbb
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -726,7 +725,7 @@
 .L_op_const_high16: /* 0x15 */
 /* File: arm/op_const_high16.S */
     /* const/high16 vAA, #+BBBB0000 */
-    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended
+    FETCH r0, 1                         @ r0<- 0000BBBB (zero-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r0, r0, lsl #16             @ r0<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
@@ -739,12 +738,12 @@
 .L_op_const_wide_16: /* 0x16 */
 /* File: arm/op_const_wide_16.S */
     /* const-wide/16 vAA, #+BBBB */
-    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended
+    FETCH_S r0, 1                       @ r0<- ssssBBBB (sign-extended)
     mov     r3, rINST, lsr #8           @ r3<- AA
     mov     r1, r0, asr #31             @ r1<- ssssssss
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -760,7 +759,7 @@
     FETCH_ADVANCE_INST 3                @ advance rPC, load rINST
     orr     r0, r0, r2, lsl #16         @ r0<- BBBBbbbb
     CLEAR_SHADOW_PAIR r3, r2, lr        @ Zero out the shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     mov     r1, r0, asr #31             @ r1<- ssssssss
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
@@ -780,7 +779,7 @@
     orr     r1, r2, r3, lsl #16         @ r1<- HHHHhhhh (high word)
     CLEAR_SHADOW_PAIR r9, r2, r3        @ Zero out the shadow regs
     FETCH_ADVANCE_INST 5                @ advance rPC, load rINST
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -796,7 +795,7 @@
     mov     r1, r1, lsl #16             @ r1<- BBBB0000
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     CLEAR_SHADOW_PAIR r3, r0, r2        @ Zero shadow regs
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[AA]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ vAA<- r0/r1
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -825,8 +824,8 @@
 /* File: arm/op_const_string_jumbo.S */
     /* const/string vAA, String@BBBBBBBB */
     EXPORT_PC
-    FETCH r0, 1                         @ r0<- bbbb (low
-    FETCH r2, 2                         @ r2<- BBBB (high
+    FETCH r0, 1                         @ r0<- bbbb (low)
+    FETCH r2, 2                         @ r2<- BBBB (high)
     mov     r1, rINST, lsr #8           @ r1<- AA
     orr     r0, r0, r2, lsl #16         @ r1<- BBBBbbbb
     add     r2, rFP, #OFF_FP_SHADOWFRAME
@@ -938,10 +937,9 @@
     VREG_INDEX_TO_ADDR r1, r1           @ r1<- &object
     ldr       r2, [rFP, #OFF_FP_METHOD] @ r2<- method
     mov       r3, rSELF                 @ r3<- self
-    mov       r9, rINST, lsr #8         @ r9<- A+
-    and       r9, r9, #15               @ r9<- A
     bl        MterpInstanceOf           @ (index, &obj, method, self)
     ldr       r1, [rSELF, #THREAD_EXCEPTION_OFFSET]
+    ubfx      r9, rINST, #8, #4         @ r9<- A
     PREFETCH_INST 2
     cmp       r1, #0                    @ exception pending?
     bne       MterpException
@@ -1509,8 +1507,8 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     cmp     r1, r3                      @ compare (vBB+1, vCC+1)
@@ -2089,7 +2087,7 @@
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldrd    r2, [r0, #MIRROR_WIDE_ARRAY_DATA_OFFSET]  @ r2/r3<- vBB[vCC]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r9, {r2-r3}                 @ vAA/vAA+1<- r2/r3
     GOTO_OPCODE ip                      @ jump to next instruction
@@ -2314,7 +2312,7 @@
     ldr     r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]    @ r3<- arrayObj->length
     add     r0, r0, r1, lsl #3          @ r0<- arrayObj + index*width
     cmp     r1, r3                      @ compare unsigned index, length
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     bcs     common_errArrayIndex        @ index >= length, bail
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     ldmia   r9, {r2-r3}                 @ r2/r3<- vAA/vAA+1
@@ -2533,7 +2531,7 @@
     cmp      r3, #0
     bne      MterpException                @ bail out
     CLEAR_SHADOW_PAIR r2, ip, lr           @ Zero out the shadow regs
-    add      r3, rFP, r2, lsl #2           @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2              @ r3<- &fp[A]
     stmia    r3, {r0-r1}                   @ fp[A]<- r0/r1
     ADVANCE 2
     GET_INST_OPCODE ip                     @ extract opcode from rINST
@@ -2736,7 +2734,7 @@
     mov      r1, rINST, lsr #12         @ r1<- B
     GET_VREG r1, r1                     @ r1<- fp[B], the object pointer
     ubfx     r2, rINST, #8, #4          @ r2<- A
-    add      r2, rFP, r2, lsl #2        @ r2<- &fp[A]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[A]
     ldr      r3, [rFP, #OFF_FP_METHOD]  @ r3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
@@ -2923,7 +2921,7 @@
     bl    artGet64StaticFromCode
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
-    add   lr, rFP, r9, lsl #2           @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
     cmp   r3, #0                        @ Fail to resolve?
     bne   MterpException                @ bail out
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
@@ -3135,7 +3133,7 @@
     FETCH   r0, 1                       @ r0<- field ref BBBB
     ldr     r1, [rFP, #OFF_FP_METHOD]
     mov     r2, rINST, lsr #8           @ r3<- AA
-    add     r2, rFP, r2, lsl #2
+    VREG_INDEX_TO_ADDR r2, r2
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
@@ -3668,8 +3666,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3696,8 +3694,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3750,8 +3748,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3779,7 +3777,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     GET_VREG r0, r3                     @ r0<- vB
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
                                @ optional op; may set condition codes
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3803,11 +3801,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     fsitos  s1, s0                              @ s1<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -3828,11 +3825,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     fsitod  d0, s0                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -3880,7 +3876,7 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vB/vB+1
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
                                @ optional op; may set condition codes
@@ -3904,8 +3900,8 @@
      */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     vldr    d0, [r3]                    @ d0<- vAA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
 
@@ -3935,11 +3931,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     ftosizs s1, s0                              @ s1<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -3964,7 +3959,7 @@
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
     GET_VREG r0, r3                     @ r0<- vB
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
                                @ optional op; may set condition codes
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -3989,11 +3984,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     flds    s0, [r3]                    @ s0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     vcvt.f64.f32  d0, s0                              @ d0<- op
     CLEAR_SHADOW_PAIR r9, ip, lr        @ Zero shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
@@ -4015,11 +4009,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     ftosizd  s0, d0                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -4043,8 +4036,8 @@
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r3, {r0-r1}                 @ r0/r1<- vAA
     CLEAR_SHADOW_PAIR rINST, ip, lr     @ Zero shadow regs
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
@@ -4070,11 +4063,10 @@
      */
     /* unop vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
     fldd    d0, [r3]                    @ d0<- vB
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
-    and     r9, r9, #15                 @ r9<- A
     vcvt.f32.f64  s0, d0                              @ s0<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
@@ -4626,9 +4618,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4670,9 +4662,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4715,8 +4707,8 @@
     FETCH r0, 1                         @ r0<- CCBB
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     mul     ip, r2, r1                  @  ip<- ZxW
@@ -4724,7 +4716,7 @@
     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
     mov     r0, rINST, lsr #8           @ r0<- AA
     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[AA]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[AA]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r0, {r9-r10}                @ vAA/vAA+1<- r9/r10
@@ -4755,9 +4747,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 1
@@ -4800,9 +4792,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 1
@@ -4844,9 +4836,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4888,9 +4880,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4932,9 +4924,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -4966,12 +4958,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r1, r1, asl r2              @ r1<- r1 << r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r1, r1, r0, lsr r3          @ r1<- r1 | (r0 << (32-r2))
@@ -4998,12 +4990,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -5030,12 +5022,12 @@
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r3, r0, #255                @ r3<- BB
     mov     r0, r0, lsr #8              @ r0<- CC
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
     GET_VREG r2, r0                     @ r2<- vCC
     ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
     and     r2, r2, #63                 @ r0<- r0 & 0x3f
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
     rsb     r3, r2, #32                 @ r3<- 32 - r2
     orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
@@ -5355,9 +5347,9 @@
     mov     rINST, rINST, lsr #8        @ rINST<- AA
     and     r2, r0, #255                @ r2<- BB
     mov     r3, r0, lsr #8              @ r3<- CC
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[AA]
-    add     r2, rFP, r2, lsl #2         @ r2<- &fp[BB]
-    add     r3, rFP, r3, lsl #2         @ r3<- &fp[CC]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[AA]
+    VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[BB]
+    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[CC]
     ldmia   r2, {r0-r1}                 @ r0/r1<- vBB/vBB+1
     ldmia   r3, {r2-r3}                 @ r2/r3<- vCC/vCC+1
     .if 0
@@ -5808,8 +5800,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -5848,8 +5840,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -5881,8 +5873,8 @@
     /* mul-long/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    r9, rINST, #8, #4           @ r9<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     rINST, rFP, r9, lsl #2      @ rINST<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR rINST, r9        @ rINST<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   rINST, {r0-r1}              @ r0/r1<- vAA/vAA+1
     mul     ip, r2, r1                  @  ip<- ZxW
@@ -5917,8 +5909,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 1
@@ -5958,8 +5950,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 1
@@ -5998,8 +5990,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6038,8 +6030,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6078,8 +6070,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6109,7 +6101,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r1, r1, asl r2              @ r1<- r1 << r2
@@ -6136,7 +6128,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
@@ -6163,7 +6155,7 @@
     ubfx    r9, rINST, #8, #4           @ r9<- A
     GET_VREG r2, r3                     @ r2<- vB
     CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
-    add     r9, rFP, r9, lsl #2         @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[A]
     and     r2, r2, #63                 @ r2<- r2 & 0x3f
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     mov     r0, r0, lsr r2              @ r0<- r2 >> r2
@@ -6191,14 +6183,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fadds   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6219,14 +6209,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fsubs   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6247,14 +6235,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fmuls   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6275,14 +6261,12 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    flds    s1, [r3]                    @ s1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
+    flds    s1, [r3]                    @ s1<- vB
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     flds    s0, [r9]                    @ s0<- vA
-
     fdivs   s2, s0, s1                              @ s2<- op
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     fsts    s2, [r9]                    @ vAA<- s2
@@ -6343,11 +6327,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6372,11 +6355,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6401,11 +6383,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6430,11 +6411,10 @@
      */
     /* binop/2addr vA, vB */
     mov     r3, rINST, lsr #12          @ r3<- B
-    mov     r9, rINST, lsr #8           @ r9<- A+
+    ubfx    r9, rINST, #8, #4           @ r9<- A
     VREG_INDEX_TO_ADDR r3, r3           @ r3<- &vB
-    and     r9, r9, #15                 @ r9<- A
-    fldd    d1, [r3]                    @ d1<- vB
     CLEAR_SHADOW_PAIR r9, ip, r0        @ Zero out shadow regs
+    fldd    d1, [r3]                    @ d1<- vB
     VREG_INDEX_TO_ADDR r9, r9           @ r9<- &vA
     FETCH_ADVANCE_INST 1                @ advance rPC, load rINST
     fldd    d0, [r9]                    @ d0<- vA
@@ -6467,8 +6447,8 @@
     /* binop/2addr vA, vB */
     mov     r1, rINST, lsr #12          @ r1<- B
     ubfx    rINST, rINST, #8, #4        @ rINST<- A
-    add     r1, rFP, r1, lsl #2         @ r1<- &fp[B]
-    add     r9, rFP, rINST, lsl #2      @ r9<- &fp[A]
+    VREG_INDEX_TO_ADDR r1, r1           @ r1<- &fp[B]
+    VREG_INDEX_TO_ADDR r9, rINST        @ r9<- &fp[A]
     ldmia   r1, {r2-r3}                 @ r2/r3<- vBB/vBB+1
     ldmia   r9, {r0-r1}                 @ r0/r1<- vAA/vAA+1
     .if 0
@@ -6783,7 +6763,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -6821,7 +6801,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -6860,7 +6840,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -6967,7 +6947,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7005,7 +6985,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7043,7 +7023,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7081,7 +7061,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7119,7 +7099,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7157,7 +7137,7 @@
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
     /* binop/lit8 vAA, vBB, #+CC */
-    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC
+    FETCH_S r3, 1                       @ r3<- ssssCCBB (sign-extended for CC)
     mov     r9, rINST, lsr #8           @ r9<- AA
     and     r2, r3, #255                @ r2<- BB
     GET_VREG r0, r2                     @ r0<- vBB
@@ -7207,7 +7187,7 @@
     beq     common_errNullObject        @ object was null
     ldrd    r0, [r3, ip]                @ r0<- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
-    add     r3, rFP, r2, lsl #2         @ r3<- &fp[A]
+    VREG_INDEX_TO_ADDR r3, r2           @ r3<- &fp[A]
     CLEAR_SHADOW_PAIR r2, ip, lr        @ Zero out the shadow regs
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     stmia   r3, {r0-r1}                 @ fp[A]<- r0/r1
@@ -7263,7 +7243,7 @@
     ubfx    r0, rINST, #8, #4           @ r0<- A
     cmp     r2, #0                      @ check object for null
     beq     common_errNullObject        @ object was null
-    add     r0, rFP, r0, lsl #2         @ r0<- &fp[A]
+    VREG_INDEX_TO_ADDR r0, r0           @ r0<- &fp[A]
     ldmia   r0, {r0-r1}                 @ r0/r1<- fp[A]/fp[A+1]
     FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
     strd    r0, [r2, r3]                @ obj.field<- r0/r1
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index c7c0fb5..cdb27e8 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -279,7 +279,7 @@
  * Convert a virtual register index into an address.
  */
 .macro VREG_INDEX_TO_ADDR reg, vreg
-    add     \reg, xFP, \vreg, lsl #2   /* WARNING/FIXME: handle shadow frame vreg zero if store */
+    add     \reg, xFP, \vreg, lsl #2   /* WARNING: handle shadow frame vreg zero if store */
 .endm
 
 /*
@@ -338,7 +338,7 @@
     /* set up "named" registers */
     mov     xSELF, x0
     ldr     w0, [x2, #SHADOWFRAME_NUMBER_OF_VREGS_OFFSET]
-    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to insns[] (i.e. - the dalivk byte code).
+    add     xFP, x2, #SHADOWFRAME_VREGS_OFFSET     // point to vregs.
     add     xREFS, xFP, w0, lsl #2                 // point to reference array in shadow frame
     ldr     w0, [x2, #SHADOWFRAME_DEX_PC_OFFSET]   // Get starting dex_pc.
     add     xPC, x1, #CODEITEM_INSNS_OFFSET        // Point to base of insns[]
@@ -2552,7 +2552,7 @@
     lsr      w1, wINST, #12             // w1<- B
     GET_VREG w1, w1                     // w1<- fp[B], the object pointer
     ubfx     w2, wINST, #8, #4          // w2<- A
-    add      x2, xFP, x2, lsl #2        // w2<- &fp[A]
+    VREG_INDEX_TO_ADDR x2, x2           // w2<- &fp[A]
     ldr      x3, [xFP, #OFF_FP_METHOD]  // w3<- referrer
     PREFETCH_INST 2
     bl       artSet64InstanceFromMterp
@@ -2941,7 +2941,7 @@
     FETCH   w0, 1                       // w0<- field ref BBBB
     ldr     x1, [xFP, #OFF_FP_METHOD]
     lsr     w2, wINST, #8               // w3<- AA
-    add     x2, xFP, w2, lsl #2
+    VREG_INDEX_TO_ADDR x2, w2
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
     bl      artSet64IndirectStaticFromMterp
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index 7ae1ab1..b134129 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -542,7 +542,7 @@
     /* set up "named" registers */
     move    rSELF, a0
     lw      a0, SHADOWFRAME_NUMBER_OF_VREGS_OFFSET(a2)
-    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to insns[] (i.e. - the dalivk byte code).
+    addu    rFP, a2, SHADOWFRAME_VREGS_OFFSET     # point to vregs.
     EAS2(rREFS, rFP, a0)                          # point to reference array in shadow frame
     lw      a0, SHADOWFRAME_DEX_PC_OFFSET(a2)     # Get starting dex_pc
     addu    rPC, a1, CODEITEM_INSNS_OFFSET        # Point to base of insns[]
@@ -4373,8 +4373,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4412,8 +4412,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4451,8 +4451,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4491,8 +4491,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4526,8 +4526,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4567,8 +4567,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4602,8 +4602,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4642,8 +4642,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4681,8 +4681,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4720,8 +4720,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4759,8 +4759,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4798,8 +4798,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
@@ -4837,8 +4837,8 @@
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
      * vCC (a1).  Useful for integer division and modulus.  Note that we
-     * *don't* check for (INT_MIN / -1) here, because the ARM math lib
-     * handles it correctly.
+     * *don't* check for (INT_MIN / -1) here, because the CPU handles it
+     * correctly.
      *
      * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
      *      xor-int, shl-int, shr-int, ushr-int
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index a3b99e3..5bd9a6b 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -41,20 +41,42 @@
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
   jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
+
   jit_options->code_cache_initial_capacity_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
   jit_options->code_cache_max_capacity_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
-  jit_options->compile_threshold_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
-  // TODO(ngeoffray): Make this a proper option.
-  jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
-  jit_options->warmup_threshold_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
   jit_options->save_profiling_info_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);;
+      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);
+
+  jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+  if (jit_options->compile_threshold_ > std::numeric_limits<uint16_t>::max()) {
+    LOG(FATAL) << "Method compilation threshold is above its internal limit.";
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITWarmupThreshold)) {
+    jit_options->warmup_threshold_ = *options.Get(RuntimeArgumentMap::JITWarmupThreshold);
+    if (jit_options->warmup_threshold_ > std::numeric_limits<uint16_t>::max()) {
+      LOG(FATAL) << "Method warmup threshold is above its internal limit.";
+    }
+  } else {
+    jit_options->warmup_threshold_ = jit_options->compile_threshold_ / 2;
+  }
+
+  if (options.Exists(RuntimeArgumentMap::JITOsrThreshold)) {
+    jit_options->osr_threshold_ = *options.Get(RuntimeArgumentMap::JITOsrThreshold);
+    if (jit_options->osr_threshold_ > std::numeric_limits<uint16_t>::max()) {
+      LOG(FATAL) << "Method on stack replacement threshold is above its internal limit.";
+    }
+  } else {
+    jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
+    if (jit_options->osr_threshold_ > std::numeric_limits<uint16_t>::max()) {
+      jit_options->osr_threshold_ = std::numeric_limits<uint16_t>::max();
+    }
+  }
+
   return jit_options;
 }
 
@@ -188,9 +210,11 @@
 }
 
 void Jit::StartProfileSaver(const std::string& filename,
-                            const std::vector<std::string>& code_paths) {
+                            const std::vector<std::string>& code_paths,
+                            const std::string& foreign_dex_profile_path,
+                            const std::string& app_dir) {
   if (save_profiling_info_) {
-    ProfileSaver::Start(filename, code_cache_.get(), code_paths);
+    ProfileSaver::Start(filename, code_cache_.get(), code_paths, foreign_dex_profile_path, app_dir);
   }
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 3f54192..d5c2134 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -43,8 +43,7 @@
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
-  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 500;
-  static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
+  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 10000;
 
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
@@ -70,7 +69,17 @@
     return instrumentation_cache_.get();
   }
 
-  void StartProfileSaver(const std::string& filename, const std::vector<std::string>& code_paths);
+  // Starts the profile saver if the config options allow profile recording.
+  // The profile will be stored in the specified `filename` and will contain
+  // information collected from the given `code_paths` (a set of dex locations).
+  // The `foreign_dex_profile_path` is the path where the saver will put the
+  // profile markers for loaded dex files which are not owned by the application.
+  // The `app_dir` is the application directory and is used to decide which
+  // dex files belong to the application.
+  void StartProfileSaver(const std::string& filename,
+                         const std::vector<std::string>& code_paths,
+                         const std::string& foreign_dex_profile_path,
+                         const std::string& app_dir);
   void StopProfileSaver();
 
   void DumpForSigQuit(std::ostream& os) {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 0b0f926..af47da6 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -297,6 +297,15 @@
   }
 }
 
+void JitCodeCache::ClearGcRootsInInlineCaches(Thread* self) {
+  MutexLock mu(self, lock_);
+  for (ProfilingInfo* info : profiling_infos_) {
+    if (!info->IsInUseByCompiler()) {
+      info->ClearGcRootsInInlineCaches();
+    }
+  }
+}
+
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           const uint8_t* mapping_table,
@@ -679,7 +688,7 @@
       // Also remove the saved entry point from the ProfilingInfo objects.
       for (ProfilingInfo* info : profiling_infos_) {
         const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
-        if (!ContainsPc(ptr) && !info->IsMethodBeingCompiled()) {
+        if (!ContainsPc(ptr) && !info->IsInUseByCompiler()) {
           info->GetMethod()->SetProfilingInfo(nullptr);
         }
         info->SetSavedEntryPoint(nullptr);
@@ -731,7 +740,7 @@
         // code cache collection.
         if (ContainsPc(ptr) && info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
           // We clear the inline caches as classes in it might be stalled.
-          info->ClearInlineCaches();
+          info->ClearGcRootsInInlineCaches();
           // Do a fence to make sure the clearing is seen before attaching to the method.
           QuasiAtomic::ThreadFenceRelease();
           info->GetMethod()->SetProfilingInfo(info);
@@ -919,6 +928,22 @@
   return true;
 }
 
+ProfilingInfo* JitCodeCache::NotifyCompilerUse(ArtMethod* method, Thread* self) {
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info != nullptr) {
+    info->IncrementInlineUse();
+  }
+  return info;
+}
+
+void JitCodeCache::DoneCompilerUse(ArtMethod* method, Thread* self) {
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  DCHECK(info != nullptr);
+  info->DecrementInlineUse();
+}
+
 void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
   ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
   DCHECK(info->IsMethodBeingCompiled());
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 0bd4f7d..98dd70d 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -71,10 +71,22 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
+  // Notify to the code cache that the compiler wants to use the
+  // profiling info of `method` to drive optimizations,
+  // and therefore ensure the returned profiling info object is not
+  // collected.
+  ProfilingInfo* NotifyCompilerUse(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   void DoneCompiling(ArtMethod* method, Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
+  void DoneCompilerUse(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
@@ -143,6 +155,8 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void ClearGcRootsInInlineCaches(Thread* self) REQUIRES(!lock_);
+
   // Create a 'ProfileInfo' for 'method'. If 'retry_allocation' is true,
   // will collect and retry if the first allocation is unsuccessful.
   ProfilingInfo* AddProfilingInfo(Thread* self,
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index bd58157..6fe17db 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -16,6 +16,10 @@
 
 #include "profile_saver.h"
 
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
 #include "art_method-inl.h"
 #include "base/systrace.h"
 #include "scoped_thread_state_change.h"
@@ -43,14 +47,31 @@
 
 ProfileSaver::ProfileSaver(const std::string& output_filename,
                            jit::JitCodeCache* jit_code_cache,
-                           const std::vector<std::string>& code_paths)
+                           const std::vector<std::string>& code_paths,
+                           const std::string& foreign_dex_profile_path,
+                           const std::string& app_data_dir)
     : jit_code_cache_(jit_code_cache),
+      foreign_dex_profile_path_(foreign_dex_profile_path),
       code_cache_last_update_time_ns_(0),
       shutting_down_(false),
       first_profile_(true),
       wait_lock_("ProfileSaver wait lock"),
       period_condition_("ProfileSaver period condition", wait_lock_) {
   AddTrackedLocations(output_filename, code_paths);
+  app_data_dir_ = "";
+  if (!app_data_dir.empty()) {
+    // The application directory is used to determine which dex files are owned by app.
+    // Since it could be a symlink (e.g. /data/data instead of /data/user/0), and we
+    // don't have control over how the dex files are actually loaded (symlink or canonical path),
+    // store it's canonical form to be sure we use the same base when comparing.
+    UniqueCPtr<const char[]> app_data_dir_real_path(realpath(app_data_dir.c_str(), nullptr));
+    if (app_data_dir_real_path != nullptr) {
+      app_data_dir_.assign(app_data_dir_real_path.get());
+    } else {
+      LOG(WARNING) << "Failed to get the real path for app dir: " << app_data_dir_
+          << ". The app dir will not be used to determine which dex files belong to the app";
+    }
+  }
 }
 
 void ProfileSaver::Run() {
@@ -164,7 +185,9 @@
 
 void ProfileSaver::Start(const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
-                         const std::vector<std::string>& code_paths) {
+                         const std::vector<std::string>& code_paths,
+                         const std::string& foreign_dex_profile_path,
+                         const std::string& app_data_dir) {
   DCHECK(Runtime::Current()->UseJit());
   DCHECK(!output_filename.empty());
   DCHECK(jit_code_cache != nullptr);
@@ -183,7 +206,11 @@
   VLOG(profiler) << "Starting profile saver using output file: " << output_filename
       << ". Tracking: " << Join(code_paths, ':');
 
-  instance_ = new ProfileSaver(output_filename, jit_code_cache, code_paths);
+  instance_ = new ProfileSaver(output_filename,
+                               jit_code_cache,
+                               code_paths,
+                               foreign_dex_profile_path,
+                               app_data_dir);
 
   // Create a new thread which does the saving.
   CHECK_PTHREAD_CALL(
@@ -250,4 +277,97 @@
   }
 }
 
+void ProfileSaver::NotifyDexUse(const std::string& dex_location) {
+  std::set<std::string> app_code_paths;
+  std::string foreign_dex_profile_path;
+  std::string app_data_dir;
+  {
+    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
+    DCHECK(instance_ != nullptr);
+    // Make a copy so that we don't hold the lock while doing I/O.
+    for (const auto& it : instance_->tracked_dex_base_locations_) {
+      app_code_paths.insert(it.second.begin(), it.second.end());
+    }
+    foreign_dex_profile_path = instance_->foreign_dex_profile_path_;
+    app_data_dir = instance_->app_data_dir_;
+  }
+
+  MaybeRecordDexUseInternal(dex_location,
+                            app_code_paths,
+                            foreign_dex_profile_path,
+                            app_data_dir);
+}
+
+void ProfileSaver::MaybeRecordDexUseInternal(
+      const std::string& dex_location,
+      const std::set<std::string>& app_code_paths,
+      const std::string& foreign_dex_profile_path,
+      const std::string& app_data_dir) {
+  if (dex_location.empty()) {
+    LOG(WARNING) << "Asked to record foreign dex use with an empty dex location.";
+    return;
+  }
+  if (foreign_dex_profile_path.empty()) {
+    LOG(WARNING) << "Asked to record foreign dex use without a valid profile path ";
+    return;
+  }
+
+  UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr));
+  if (dex_location_real_path == nullptr) {
+    PLOG(WARNING) << "Could not get realpath for " << dex_location;
+  }
+  std::string dex_location_real_path_str((dex_location_real_path == nullptr)
+    ? dex_location.c_str()
+    : dex_location_real_path.get());
+
+  if (dex_location_real_path_str.compare(0, app_data_dir.length(), app_data_dir) == 0) {
+    // The dex location is under the application folder. Nothing to record.
+    return;
+  }
+
+  if (app_code_paths.find(dex_location) != app_code_paths.end()) {
+    // The dex location belongs to the application code paths. Nothing to record.
+    return;
+  }
+  // Do another round of checks with the real paths.
+  // Note that we could cache all the real locations in the saver (since it's an expensive
+  // operation). However we expect that app_code_paths is small (usually 1 element), and
+  // NotifyDexUse is called just a few times in the app lifetime. So we make the compromise
+  // to save some bytes of memory usage.
+  for (const auto& app_code_location : app_code_paths) {
+    UniqueCPtr<const char[]> real_app_code_location(realpath(app_code_location.c_str(), nullptr));
+    if (real_app_code_location == nullptr) {
+      PLOG(WARNING) << "Could not get realpath for " << app_code_location;
+    }
+    std::string real_app_code_location_str((real_app_code_location == nullptr)
+        ? app_code_location.c_str()
+        : real_app_code_location.get());
+    if (real_app_code_location_str == dex_location_real_path_str) {
+      // The dex location belongs to the application code paths. Nothing to record.
+      return;
+    }
+  }
+
+  // For foreign dex files we record a flag on disk. PackageManager will (potentially) take this
+  // into account when deciding how to optimize the loaded dex file.
+  // The expected flag name is the canonical path of the apk where '/' is substituted to '@'.
+  // (it needs to be kept in sync with
+  // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java)
+  std::replace(dex_location_real_path_str.begin(), dex_location_real_path_str.end(), '/', '@');
+  std::string flag_path = foreign_dex_profile_path + "/" + dex_location_real_path_str;
+  // No need to give any sort of access to flag_path. The system has enough permissions
+  // to test for its existence.
+  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(), O_CREAT | O_EXCL, 0));
+  if (fd != -1) {
+    if (close(fd) != 0) {
+      PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
+    }
+  } else {
+    if (errno != EEXIST) {
+      // Another app could have already created the file.
+      PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
+    }
+  }
+}
+
 }   // namespace art
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 21017c1..e7eab95 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -30,7 +30,9 @@
   // If the saver is already running it adds (output_filename, code_paths) to its tracked locations.
   static void Start(const std::string& output_filename,
                     jit::JitCodeCache* jit_code_cache,
-                    const std::vector<std::string>& code_paths)
+                    const std::vector<std::string>& code_paths,
+                    const std::string& foreign_dex_profile_path,
+                    const std::string& app_data_dir)
       REQUIRES(!Locks::profiler_lock_, !wait_lock_);
 
   // Stops the profile saver thread.
@@ -42,10 +44,14 @@
   // Returns true if the profile saver is started.
   static bool IsStarted() REQUIRES(!Locks::profiler_lock_);
 
+  static void NotifyDexUse(const std::string& dex_location);
+
  private:
   ProfileSaver(const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
-               const std::vector<std::string>& code_paths);
+               const std::vector<std::string>& code_paths,
+               const std::string& foreign_dex_profile_path,
+               const std::string& app_data_dir);
 
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* RunProfileSaverThread(void* arg)
@@ -64,6 +70,12 @@
                            const std::vector<std::string>& code_paths)
       REQUIRES(Locks::profiler_lock_);
 
+  static void MaybeRecordDexUseInternal(
+      const std::string& dex_location,
+      const std::set<std::string>& tracked_locations,
+      const std::string& foreign_dex_profile_path,
+      const std::string& app_data_dir);
+
   // The only instance of the saver.
   static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_);
   // Profile saver thread.
@@ -72,6 +84,8 @@
   jit::JitCodeCache* jit_code_cache_;
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
+  std::string foreign_dex_profile_path_;
+  std::string app_data_dir_;
   uint64_t code_cache_last_update_time_ns_;
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
   bool first_profile_ = true;
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 3820592..07c8051 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -97,8 +97,8 @@
       }
     }
   }
-  // Unsuccessfull - cache is full, making it megamorphic.
-  DCHECK(cache->IsMegamorphic());
+  // Unsuccessfull - cache is full, making it megamorphic. We do not DCHECK it though,
+  // as the garbage collector might clear the entries concurrently.
 }
 
 }  // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index a8c056c..55d627a 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -56,10 +56,11 @@
   mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
     // Note that we cannot ensure the inline cache is actually monomorphic
     // at this point, as other threads may have updated it.
+    DCHECK(!classes_[0].IsNull());
     return classes_[0].Read();
   }
 
-  bool IsUnitialized() const {
+  bool IsUninitialized() const {
     return classes_[0].IsNull();
   }
 
@@ -134,8 +135,27 @@
     return saved_entry_point_;
   }
 
-  void ClearInlineCaches() {
-    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+  void ClearGcRootsInInlineCaches() {
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      InlineCache* cache = &cache_[i];
+      memset(&cache->classes_[0],
+             0,
+             InlineCache::kIndividualCacheSize * sizeof(GcRoot<mirror::Class>));
+    }
+  }
+
+  void IncrementInlineUse() {
+    DCHECK_NE(current_inline_uses_, std::numeric_limits<uint16_t>::max());
+    current_inline_uses_++;
+  }
+
+  void DecrementInlineUse() {
+    DCHECK_GT(current_inline_uses_, 0);
+    current_inline_uses_--;
+  }
+
+  bool IsInUseByCompiler() const {
+    return IsMethodBeingCompiled() || (current_inline_uses_ > 0);
   }
 
  private:
@@ -143,8 +163,9 @@
       : number_of_inline_caches_(entries.size()),
         method_(method),
         is_method_being_compiled_(false),
+        current_inline_uses_(0),
         saved_entry_point_(nullptr) {
-    ClearInlineCaches();
+    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       cache_[i].dex_pc_ = entries[i];
     }
@@ -161,6 +182,10 @@
   // TODO: Make the JIT code cache lock global.
   bool is_method_being_compiled_;
 
+  // When the compiler inlines the method associated to this ProfilingInfo,
+  // it updates this counter so that the GC does not try to clear the inline caches.
+  uint16_t current_inline_uses_;
+
   // Entry point of the corresponding ArtMethod, while the JIT code cache
   // is poking for the liveness of compiled code.
   const void* saved_entry_point_;
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 11156c6..421641c 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -590,7 +590,19 @@
 }
 
 bool MemMap::Sync() {
-  return msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+  bool result;
+  if (redzone_size_ != 0) {
+    // To avoid valgrind errors, temporarily lift the lower-end noaccess protection before passing
+    // it to msync() as it only accepts page-aligned base address, and exclude the higher-end
+    // noaccess protection from the msync range. b/27552451.
+    uint8_t* base_begin = reinterpret_cast<uint8_t*>(base_begin_);
+    MEMORY_TOOL_MAKE_DEFINED(base_begin, begin_ - base_begin);
+    result = msync(BaseBegin(), End() - base_begin, MS_SYNC) == 0;
+    MEMORY_TOOL_MAKE_NOACCESS(base_begin, begin_ - base_begin);
+  } else {
+    result = msync(BaseBegin(), BaseSize(), MS_SYNC) == 0;
+  }
+  return result;
 }
 
 bool MemMap::Protect(int prot) {
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 4d94130..701c600 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -183,7 +183,7 @@
         break;
       }
       case LockWord::kFatLocked: {
-        // Already inflated, return the has stored in the monitor.
+        // Already inflated, return the hash stored in the monitor.
         Monitor* monitor = lw.FatLockMonitor();
         DCHECK(monitor != nullptr);
         return monitor->GetHashCode();
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 1ce5841..a262c7a 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -497,6 +497,24 @@
     self->SetWaitMonitor(nullptr);
   }
 
+  // Allocate the interrupted exception not holding the monitor lock since it may cause a GC.
+  // If the GC requires acquiring the monitor for enqueuing cleared references, this would
+  // cause a deadlock if the monitor is held.
+  if (was_interrupted && interruptShouldThrow) {
+    /*
+     * We were interrupted while waiting, or somebody interrupted an
+     * un-interruptible thread earlier and we're bailing out immediately.
+     *
+     * The doc sayeth: "The interrupted status of the current thread is
+     * cleared when this exception is thrown."
+     */
+    {
+      MutexLock mu(self, *self->GetWaitMutex());
+      self->SetInterruptedLocked(false);
+    }
+    self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
+  }
+
   // Re-acquire the monitor and lock.
   Lock(self);
   monitor_lock_.Lock(self);
@@ -516,21 +534,6 @@
   RemoveFromWaitSet(self);
 
   monitor_lock_.Unlock(self);
-
-  if (was_interrupted && interruptShouldThrow) {
-    /*
-     * We were interrupted while waiting, or somebody interrupted an
-     * un-interruptible thread earlier and we're bailing out immediately.
-     *
-     * The doc sayeth: "The interrupted status of the current thread is
-     * cleared when this exception is thrown."
-     */
-    {
-      MutexLock mu(self, *self->GetWaitMutex());
-      self->SetInterruptedLocked(false);
-    }
-    self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
-  }
 }
 
 void Monitor::Notify(Thread* self) {
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 9e78cda..ce38e4f 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -42,11 +42,12 @@
     if (capacity_ == 0U) {
       // Initialization.
       capacity_ = kInitialChunkStorage;
-      uintptr_t* new_backing = new uintptr_t[capacity_];
+      uintptr_t* new_backing = new uintptr_t[capacity_]();
+      DCHECK(monitor_chunks_.LoadRelaxed() == nullptr);
       monitor_chunks_.StoreRelaxed(new_backing);
     } else {
       size_t new_capacity = 2 * capacity_;
-      uintptr_t* new_backing = new uintptr_t[new_capacity];
+      uintptr_t* new_backing = new uintptr_t[new_capacity]();
       uintptr_t* old_backing = monitor_chunks_.LoadRelaxed();
       memcpy(new_backing, old_backing, sizeof(uintptr_t) * capacity_);
       monitor_chunks_.StoreRelaxed(new_backing);
@@ -88,6 +89,25 @@
   first_free_ = last;
 }
 
+void MonitorPool::FreeInternal() {
+  // This is on shutdown with NO_THREAD_SAFETY_ANALYSIS, can't/don't need to lock.
+  uintptr_t* backing = monitor_chunks_.LoadRelaxed();
+  DCHECK(backing != nullptr);
+  DCHECK_GT(capacity_, 0U);
+  DCHECK_GT(num_chunks_, 0U);
+
+  for (size_t i = 0; i < capacity_; ++i) {
+    if (i < num_chunks_) {
+      DCHECK_NE(backing[i], 0U);
+      allocator_.deallocate(reinterpret_cast<uint8_t*>(backing[i]), kChunkSize);
+    } else {
+      DCHECK_EQ(backing[i], 0U);
+    }
+  }
+
+  delete[] backing;
+}
+
 Monitor* MonitorPool::CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj,
                                           int32_t hash_code)
     SHARED_REQUIRES(Locks::mutator_lock_) {
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index de553fc..875b3fe 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -104,6 +104,12 @@
 #endif
   }
 
+  ~MonitorPool() {
+#ifdef __LP64__
+    FreeInternal();
+#endif
+  }
+
  private:
 #ifdef __LP64__
   // When we create a monitor pool, threads have not been initialized, yet, so ignore thread-safety
@@ -112,6 +118,10 @@
 
   void AllocateChunk() REQUIRES(Locks::allocated_monitor_ids_lock_);
 
+  // Release all chunks and metadata. This is done on shutdown, where threads have been destroyed,
+  // so ignore thead-safety analysis.
+  void FreeInternal() NO_THREAD_SAFETY_ANALYSIS;
+
   Monitor* CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 6643ac2..f1e0fa7 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -347,15 +347,14 @@
 
 static jint GetDexOptNeeded(JNIEnv* env,
                             const char* filename,
-                            const char* pkgname,
                             const char* instruction_set,
-                            const jboolean defer) {
+                            const int target_compilation_type_mask) {
   if ((filename == nullptr) || !OS::FileExists(filename)) {
     LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
     const char* message = (filename == nullptr) ? "<empty file name>" : filename;
     env->ThrowNew(fnfe.get(), message);
-    return OatFileAssistant::kNoDexOptNeeded;
+    return -1;
   }
 
   const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
@@ -363,73 +362,52 @@
     ScopedLocalRef<jclass> iae(env, env->FindClass("java/lang/IllegalArgumentException"));
     std::string message(StringPrintf("Instruction set %s is invalid.", instruction_set));
     env->ThrowNew(iae.get(), message.c_str());
-    return 0;
+    return -1;
   }
 
   // TODO: Verify the dex location is well formed, and throw an IOException if
   // not?
-
-  OatFileAssistant oat_file_assistant(filename, target_instruction_set, false, pkgname);
+  OatFileAssistant oat_file_assistant(filename, target_compilation_type_mask,
+      target_instruction_set, false);
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
     return OatFileAssistant::kNoDexOptNeeded;
   }
 
-  // TODO: Checking the profile should probably be done in the GetStatus()
-  // function. We have it here because GetStatus() should not be copying
-  // profile files. But who should be copying profile files?
-  if (oat_file_assistant.OdexFileIsOutOfDate()) {
-    // Needs recompile if profile has changed significantly.
-    if (Runtime::Current()->GetProfilerOptions().IsEnabled()) {
-      if (oat_file_assistant.IsProfileChangeSignificant()) {
-        if (!defer) {
-          oat_file_assistant.CopyProfileFile();
-        }
-        return OatFileAssistant::kDex2OatNeeded;
-      } else if (oat_file_assistant.ProfileExists()
-          && !oat_file_assistant.OldProfileExists()) {
-        if (!defer) {
-          oat_file_assistant.CopyProfileFile();
-        }
-      }
-    }
-  }
-
   return oat_file_assistant.GetDexOptNeeded();
 }
 
 static jint DexFile_getDexOptNeeded(JNIEnv* env,
                                     jclass,
                                     jstring javaFilename,
-                                    jstring javaPkgname,
                                     jstring javaInstructionSet,
-                                    jboolean defer) {
+                                    jint javaTargetCompilationTypeMask) {
   ScopedUtfChars filename(env, javaFilename);
   if (env->ExceptionCheck()) {
-    return 0;
+    return -1;
   }
 
-  NullableScopedUtfChars pkgname(env, javaPkgname);
-
   ScopedUtfChars instruction_set(env, javaInstructionSet);
   if (env->ExceptionCheck()) {
-    return 0;
+    return -1;
   }
 
   return GetDexOptNeeded(env,
                          filename.c_str(),
-                         pkgname.c_str(),
                          instruction_set.c_str(),
-                         defer);
+                         javaTargetCompilationTypeMask);
 }
 
-// public API, null pkgname
+// public API
 static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
   const char* instruction_set = GetInstructionSetString(kRuntimeISA);
   ScopedUtfChars filename(env, javaFilename);
-  jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */,
-                                instruction_set, false /* defer */);
+  jint status = GetDexOptNeeded(
+      env,
+      filename.c_str(),
+      instruction_set,
+      OatFileAssistant::kFullCompilation | OatFileAssistant::kProfileGuideCompilation);
   return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
 }
 
@@ -445,7 +423,7 @@
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(DexFile, getDexOptNeeded,
-                "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
+                "(Ljava/lang/String;Ljava/lang/String;I)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
                 "(Ljava/lang/String;"
                 "Ljava/lang/String;"
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index da4a891..f6b2f21 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -566,8 +566,9 @@
 static void VMRuntime_registerAppInfo(JNIEnv* env,
                                       jclass clazz ATTRIBUTE_UNUSED,
                                       jstring profile_file,
-                                      jstring app_dir ATTRIBUTE_UNUSED,  // TODO: remove argument
-                                      jobjectArray code_paths) {
+                                      jstring app_dir,
+                                      jobjectArray code_paths,
+                                      jstring foreign_dex_profile_path) {
   std::vector<std::string> code_paths_vec;
   int code_paths_length = env->GetArrayLength(code_paths);
   for (int i = 0; i < code_paths_length; i++) {
@@ -581,7 +582,22 @@
   std::string profile_file_str(raw_profile_file);
   env->ReleaseStringUTFChars(profile_file, raw_profile_file);
 
-  Runtime::Current()->RegisterAppInfo(code_paths_vec, profile_file_str);
+  std::string foreign_dex_profile_path_str = "";
+  if (foreign_dex_profile_path != nullptr) {
+    const char* raw_foreign_dex_profile_path =
+        env->GetStringUTFChars(foreign_dex_profile_path, nullptr);
+    foreign_dex_profile_path_str.assign(raw_foreign_dex_profile_path);
+    env->ReleaseStringUTFChars(foreign_dex_profile_path, raw_foreign_dex_profile_path);
+  }
+
+  const char* raw_app_dir = env->GetStringUTFChars(app_dir, nullptr);
+  std::string app_dir_str(raw_app_dir);
+  env->ReleaseStringUTFChars(app_dir, raw_app_dir);
+
+  Runtime::Current()->RegisterAppInfo(code_paths_vec,
+                                      profile_file_str,
+                                      foreign_dex_profile_path_str,
+                                      app_dir_str);
 }
 
 static jboolean VMRuntime_isBootClassPathOnDisk(JNIEnv* env, jclass, jstring java_instruction_set) {
@@ -638,7 +654,7 @@
   NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "!()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
   NATIVE_METHOD(VMRuntime, registerAppInfo,
-                "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;)V"),
+                "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;Ljava/lang/String;)V"),
   NATIVE_METHOD(VMRuntime, isBootClassPathOnDisk, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(VMRuntime, getCurrentInstructionSet, "()Ljava/lang/String;"),
 };
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 4948558..ed99cba 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -29,6 +29,8 @@
 constexpr uint8_t OatHeader::kOatVersion[4];
 constexpr const char OatHeader::kTrueValue[];
 constexpr const char OatHeader::kFalseValue[];
+constexpr const char OatHeader::kExtractOnlyValue[];
+constexpr const char OatHeader::kProfileGuideCompiledValue[];
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
@@ -466,13 +468,29 @@
   return IsKeyEnabled(OatHeader::kDebuggableKey);
 }
 
+bool OatHeader::IsNativeDebuggable() const {
+  return IsKeyEnabled(OatHeader::kNativeDebuggableKey);
+}
+
 bool OatHeader::IsExtractOnly() const {
-  return IsKeyEnabled(OatHeader::kExtractOnlyKey);
+  return KeyHasValue(kCompilationType,
+                     kExtractOnlyValue,
+                     sizeof(kExtractOnlyValue));
+}
+
+bool OatHeader::IsProfileGuideCompiled() const {
+  return KeyHasValue(kCompilationType,
+                     kProfileGuideCompiledValue,
+                     sizeof(kProfileGuideCompiledValue));
+}
+
+bool OatHeader::KeyHasValue(const char* key, const char* value, size_t value_size) const {
+  const char* key_value = GetStoreValueByKey(key);
+  return (key_value != nullptr && strncmp(key_value, value, value_size) == 0);
 }
 
 bool OatHeader::IsKeyEnabled(const char* key) const {
-  const char* key_value = GetStoreValueByKey(key);
-  return (key_value != nullptr && strncmp(key_value, kTrueValue, sizeof(kTrueValue)) == 0);
+  return KeyHasValue(key, kTrueValue, sizeof(kTrueValue));
 }
 
 void OatHeader::Flatten(const SafeMap<std::string, std::string>* key_value_store) {
diff --git a/runtime/oat.h b/runtime/oat.h
index fde386f..1d6c076 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -38,12 +38,16 @@
   static constexpr const char* kDex2OatHostKey = "dex2oat-host";
   static constexpr const char* kPicKey = "pic";
   static constexpr const char* kDebuggableKey = "debuggable";
-  static constexpr const char* kExtractOnlyKey = "extract-only";
+  static constexpr const char* kNativeDebuggableKey = "native-debuggable";
+  static constexpr const char* kCompilationType = "compilation-type";
   static constexpr const char* kClassPathKey = "classpath";
   static constexpr const char* kBootClassPath = "bootclasspath";
 
   static constexpr const char kTrueValue[] = "true";
   static constexpr const char kFalseValue[] = "false";
+  static constexpr const char kExtractOnlyValue[] = "extract-only";
+  static constexpr const char kProfileGuideCompiledValue[] = "profile-guide";
+
 
   static OatHeader* Create(InstructionSet instruction_set,
                            const InstructionSetFeatures* instruction_set_features,
@@ -107,9 +111,13 @@
   size_t GetHeaderSize() const;
   bool IsPic() const;
   bool IsDebuggable() const;
+  bool IsNativeDebuggable() const;
   bool IsExtractOnly() const;
+  bool IsProfileGuideCompiled() const;
 
  private:
+  bool KeyHasValue(const char* key, const char* value, size_t value_size) const;
+
   OatHeader(InstructionSet instruction_set,
             const InstructionSetFeatures* instruction_set_features,
             uint32_t dex_file_count,
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index c389547..7155c79 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -169,7 +169,10 @@
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    // Host can fail this check. Do not dump there to avoid polluting the output.
+    if (kIsTargetBuild) {
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    }
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
         "oatdata=%p != expected=%p. See process maps in the log.",
         begin_, requested_base);
@@ -1232,6 +1235,10 @@
   return GetOatHeader().IsExtractOnly();
 }
 
+bool OatFile::IsProfileGuideCompiled() const {
+  return GetOatHeader().IsProfileGuideCompiled();
+}
+
 static constexpr char kDexClassPathEncodingSeparator = '*';
 
 std::string OatFile::EncodeDexFileDependencies(const std::vector<const DexFile*>& dex_files) {
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index fb91a8c..1084253 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -93,6 +93,8 @@
 
   bool IsExtractOnly() const;
 
+  bool IsProfileGuideCompiled() const;
+
   const std::string& GetLocation() const {
     return location_;
   }
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 262c932..90712c6 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -36,7 +36,6 @@
 #include "image.h"
 #include "oat.h"
 #include "os.h"
-#include "profiler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedFd.h"
@@ -45,28 +44,19 @@
 namespace art {
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
+                                   const int target_compilation_type_mask,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, nullptr, isa, load_executable, nullptr) { }
+    : OatFileAssistant(dex_location, nullptr, target_compilation_type_mask, isa, load_executable)
+{ }
 
 OatFileAssistant::OatFileAssistant(const char* dex_location,
                                    const char* oat_location,
+                                   const int target_compilation_type_mask,
                                    const InstructionSet isa,
                                    bool load_executable)
-    : OatFileAssistant(dex_location, oat_location, isa, load_executable, nullptr) { }
-
-OatFileAssistant::OatFileAssistant(const char* dex_location,
-                                   const InstructionSet isa,
-                                   bool load_executable,
-                                   const char* package_name)
-    : OatFileAssistant(dex_location, nullptr, isa, load_executable, package_name) { }
-
-OatFileAssistant::OatFileAssistant(const char* dex_location,
-                                   const char* oat_location,
-                                   const InstructionSet isa,
-                                   bool load_executable,
-                                   const char* package_name)
-    : isa_(isa), package_name_(package_name), load_executable_(load_executable) {
+    : target_compilation_type_mask_(target_compilation_type_mask), isa_(isa),
+      load_executable_(load_executable) {
   CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location";
   dex_location_.assign(dex_location);
 
@@ -83,18 +73,6 @@
     cached_oat_file_name_attempted_ = true;
     cached_oat_file_name_found_ = true;
   }
-
-  // If there is no package name given, we will not be able to find any
-  // profiles associated with this dex location. Preemptively mark that to
-  // be the case, rather than trying to find and load the profiles later.
-  // Similarly, if profiling is disabled.
-  if (package_name == nullptr
-      || !Runtime::Current()->GetProfilerOptions().IsEnabled()) {
-    profile_load_attempted_ = true;
-    profile_load_succeeded_ = false;
-    old_profile_load_attempted_ = true;
-    old_profile_load_succeeded_ = false;
-  }
 }
 
 OatFileAssistant::~OatFileAssistant() {
@@ -138,10 +116,23 @@
   return true;
 }
 
-OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() {
-  // TODO: If the profiling code is ever restored, it's worth considering
-  // whether we should check to see if the profile is out of date here.
+// Returns the compilation mode of the given oat file.
+static OatFileAssistant::CompilationType GetCompilationType(const OatFile& oat_file) {
+    if (oat_file.IsExtractOnly()) {
+      return OatFileAssistant::kExtractOnly;
+    }
+    if (oat_file.IsProfileGuideCompiled()) {
+      return OatFileAssistant::kProfileGuideCompilation;
+    }
+    // Assume that if the oat files is not extract-only or profile-guide compiled
+    // then it must be fully compiled.
+    // NB: this does not necessary mean that the oat file is actually fully compiled. It
+    // might have been compiled in a different way (e.g. interpret-only) which does
+    // not record a type in the header.
+    return OatFileAssistant::kFullCompilation;
+}
 
+OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() {
   if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
     return kNoDexOptNeeded;
   }
@@ -419,6 +410,11 @@
 }
 
 bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
+  // Verify the file satisfies the desired compilation type.
+  if ((target_compilation_type_mask_ & GetCompilationType(file)) == 0) {
+    return true;
+  }
+
   // Verify the dex checksum.
   // Note: GetOatDexFile will return null if the dex checksum doesn't match
   // what we provide, which verifies the primary dex checksum for us.
@@ -541,104 +537,6 @@
   return true;
 }
 
-bool OatFileAssistant::ProfileExists() {
-  return GetProfile() != nullptr;
-}
-
-bool OatFileAssistant::OldProfileExists() {
-  return GetOldProfile() != nullptr;
-}
-
-// TODO: The IsProfileChangeSignificant implementation was copied from likely
-// bit-rotted code.
-bool OatFileAssistant::IsProfileChangeSignificant() {
-  ProfileFile* profile = GetProfile();
-  if (profile == nullptr) {
-    return false;
-  }
-
-  ProfileFile* old_profile = GetOldProfile();
-  if (old_profile == nullptr) {
-    return false;
-  }
-
-  // TODO: The following code to compare two profile files should live with
-  // the rest of the profiler code, not the oat file assistant code.
-
-  // A change in profile is considered significant if X% (change_thr property)
-  // of the top K% (compile_thr property) samples has changed.
-  const ProfilerOptions& options = Runtime::Current()->GetProfilerOptions();
-  const double top_k_threshold = options.GetTopKThreshold();
-  const double change_threshold = options.GetTopKChangeThreshold();
-  std::set<std::string> top_k, old_top_k;
-  profile->GetTopKSamples(top_k, top_k_threshold);
-  old_profile->GetTopKSamples(old_top_k, top_k_threshold);
-  std::set<std::string> diff;
-  std::set_difference(top_k.begin(), top_k.end(), old_top_k.begin(),
-      old_top_k.end(), std::inserter(diff, diff.end()));
-
-  // TODO: consider using the usedPercentage instead of the plain diff count.
-  double change_percent = 100.0 * static_cast<double>(diff.size())
-                                / static_cast<double>(top_k.size());
-  std::set<std::string>::iterator end = diff.end();
-  for (std::set<std::string>::iterator it = diff.begin(); it != end; it++) {
-    VLOG(oat) << "Profile new in topK: " << *it;
-  }
-
-  if (change_percent > change_threshold) {
-      VLOG(oat) << "Oat File Assistant: Profile for " << dex_location_
-        << "has changed significantly: (top "
-        << top_k_threshold << "% samples changed in proportion of "
-        << change_percent << "%)";
-      return true;
-  }
-  return false;
-}
-
-// TODO: The CopyProfileFile implementation was copied from likely bit-rotted
-// code.
-void OatFileAssistant::CopyProfileFile() {
-  if (!ProfileExists()) {
-    return;
-  }
-
-  std::string profile_name = ProfileFileName();
-  std::string old_profile_name = OldProfileFileName();
-
-  ScopedFd src(open(old_profile_name.c_str(), O_RDONLY));
-  if (src.get() == -1) {
-    PLOG(WARNING) << "Failed to open profile file " << old_profile_name
-      << ". My uid:gid is " << getuid() << ":" << getgid();
-    return;
-  }
-
-  struct stat stat_src;
-  if (fstat(src.get(), &stat_src) == -1) {
-    PLOG(WARNING) << "Failed to get stats for profile file  " << old_profile_name
-      << ". My uid:gid is " << getuid() << ":" << getgid();
-    return;
-  }
-
-  // Create the copy with rw------- (only accessible by system)
-  ScopedFd dst(open(profile_name.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0600));
-  if (dst.get()  == -1) {
-    PLOG(WARNING) << "Failed to create/write prev profile file " << profile_name
-      << ".  My uid:gid is " << getuid() << ":" << getgid();
-    return;
-  }
-
-#ifdef __linux__
-  if (sendfile(dst.get(), src.get(), nullptr, stat_src.st_size) == -1) {
-#else
-  off_t len;
-  if (sendfile(dst.get(), src.get(), 0, &len, nullptr, 0) == -1) {
-#endif
-    PLOG(WARNING) << "Failed to copy profile file " << old_profile_name
-      << " to " << profile_name << ". My uid:gid is " << getuid()
-      << ":" << getgid();
-  }
-}
-
 bool OatFileAssistant::RelocateOatFile(const std::string* input_file,
                                        std::string* error_msg) {
   CHECK(error_msg != nullptr);
@@ -694,6 +592,15 @@
 bool OatFileAssistant::GenerateOatFile(std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
+  // TODO: Currently we only know how to make a fully-compiled oat file.
+  // Perhaps we should support generating other kinds of oat files?
+  if ((target_compilation_type_mask_ & kFullCompilation) == 0) {
+    *error_msg = "Generation of oat file for dex location " + dex_location_
+      + " not attempted because full compilation was not specified"
+      + " as an acceptable target compilation type.";
+    return false;
+  }
+
   Runtime* runtime = Runtime::Current();
   if (!runtime->IsDex2OatEnabled()) {
     *error_msg = "Generation of oat file for dex location " + dex_location_
@@ -861,21 +768,6 @@
   return result;
 }
 
-std::string OatFileAssistant::ProfileFileName() {
-  if (package_name_ != nullptr) {
-    return DalvikCacheDirectory() + std::string("profiles/") + package_name_;
-  }
-  return "";
-}
-
-std::string OatFileAssistant::OldProfileFileName() {
-  std::string profile_name = ProfileFileName();
-  if (profile_name.empty()) {
-    return "";
-  }
-  return profile_name + "@old";
-}
-
 std::string OatFileAssistant::ImageLocation() {
   Runtime* runtime = Runtime::Current();
   const std::vector<gc::space::ImageSpace*>& image_spaces =
@@ -1007,34 +899,6 @@
   return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
 }
 
-ProfileFile* OatFileAssistant::GetProfile() {
-  if (!profile_load_attempted_) {
-    CHECK(package_name_ != nullptr)
-      << "pakage_name_ is nullptr: "
-      << "profile_load_attempted_ should have been true";
-    profile_load_attempted_ = true;
-    std::string profile_name = ProfileFileName();
-    if (!profile_name.empty()) {
-      profile_load_succeeded_ = cached_profile_.LoadFile(profile_name);
-    }
-  }
-  return profile_load_succeeded_ ? &cached_profile_ : nullptr;
-}
-
-ProfileFile* OatFileAssistant::GetOldProfile() {
-  if (!old_profile_load_attempted_) {
-    CHECK(package_name_ != nullptr)
-      << "pakage_name_ is nullptr: "
-      << "old_profile_load_attempted_ should have been true";
-    old_profile_load_attempted_ = true;
-    std::string old_profile_name = OldProfileFileName();
-    if (!old_profile_name.empty()) {
-      old_profile_load_succeeded_ = cached_old_profile_.LoadFile(old_profile_name);
-    }
-  }
-  return old_profile_load_succeeded_ ? &cached_old_profile_ : nullptr;
-}
-
 gc::space::ImageSpace* OatFileAssistant::OpenImageSpace(const OatFile* oat_file) {
   DCHECK(oat_file != nullptr);
   std::string art_file = ArtFileName(oat_file);
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 7b45bca..893aea2 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -44,9 +44,6 @@
 // The oat file assistant is intended to be used with dex locations not on the
 // boot class path. See the IsInBootClassPath method for a way to check if the
 // dex location is in the boot class path.
-//
-// TODO: All the profiling related code is old and untested. It should either
-// be restored and tested, or removed.
 class OatFileAssistant {
  public:
   enum DexOptNeeded {
@@ -73,8 +70,8 @@
 
   enum OatStatus {
     // kOatOutOfDate - An oat file is said to be out of date if the file does
-    // not exist, or is out of date with respect to the dex file or boot
-    // image.
+    // not exist, is out of date with respect to the dex file or boot image,
+    // or does not meet the target compilation type.
     kOatOutOfDate,
 
     // kOatNeedsRelocation - An oat file is said to need relocation if the
@@ -88,6 +85,20 @@
     kOatUpToDate,
   };
 
+  // Represents the different compilation types of oat files that OatFileAssitant
+  // and external GetDexOptNeeded callers care about.
+  // Note: these should be able to be used as part of a mask.
+  enum CompilationType {
+    // Matches Java: dalvik.system.DexFile.COMPILATION_TYPE_FULL = 1
+    kFullCompilation = 1,
+
+    // Matches Java: dalvik.system.DexFile.COMPILATION_TYPE_PROFILE_GUIDE = 2
+    kProfileGuideCompilation = 2,
+
+    // Matches Java: dalvik.system.DexFile.COMPILATION_TYPE_EXTRACT_ONLY = 4
+    kExtractOnly = 4,
+  };
+
   // Constructs an OatFileAssistant object to assist the oat file
   // corresponding to the given dex location with the target instruction set.
   //
@@ -99,31 +110,28 @@
   // Note: Currently the dex_location must have an extension.
   // TODO: Relax this restriction?
   //
+  // The target compilation type specifies a set of CompilationTypes that
+  // should be considered up to date. An oat file compiled in a way not
+  // included in the set is considered out of date. For example, to consider
+  // otherwise up-to-date fully compiled and profile-guide compiled oat
+  // files as up to date, but to consider extract-only files as out of date,
+  // specify: (kFullCompilation | kProfileGuideCompilation).
+  //
   // The isa should be either the 32 bit or 64 bit variant for the current
   // device. For example, on an arm device, use arm or arm64. An oat file can
   // be loaded executable only if the ISA matches the current runtime.
-  OatFileAssistant(const char* dex_location, const InstructionSet isa,
+  OatFileAssistant(const char* dex_location,
+                   int target_compilation_type_mask,
+                   const InstructionSet isa,
                    bool load_executable);
 
   // Constructs an OatFileAssistant, providing an explicit target oat_location
   // to use instead of the standard oat location.
-  OatFileAssistant(const char* dex_location, const char* oat_location,
-                   const InstructionSet isa, bool load_executable);
-
-  // Constructs an OatFileAssistant, providing an additional package_name used
-  // solely for the purpose of locating profile files.
-  //
-  // TODO: Why is the name of the profile file based on the package name and
-  // not the dex location? If there is no technical reason the dex_location
-  // can't be used, we should prefer that instead.
-  OatFileAssistant(const char* dex_location, const InstructionSet isa,
-                   bool load_executable, const char* package_name);
-
-  // Constructs an OatFileAssistant with user specified oat location and a
-  // package name.
-  OatFileAssistant(const char* dex_location, const char* oat_location,
-                   const InstructionSet isa, bool load_executable,
-                   const char* package_name);
+  OatFileAssistant(const char* dex_location,
+                   const char* oat_location,
+                   int target_compilation_type_mask,
+                   const InstructionSet isa,
+                   bool load_executable);
 
   ~OatFileAssistant();
 
@@ -233,28 +241,6 @@
   bool GivenOatFileNeedsRelocation(const OatFile& file);
   bool GivenOatFileIsUpToDate(const OatFile& file);
 
-  // Returns true if there is an accessible profile associated with the dex
-  // location.
-  // This returns false if profiling is disabled.
-  bool ProfileExists();
-
-  // The old profile is a file containing a previous snapshot of profiling
-  // information associated with the dex file code. This is used to track how
-  // the profiling information has changed over time.
-  //
-  // Returns true if there is an accessible old profile associated with the
-  // dex location.
-  // This returns false if profiling is disabled.
-  bool OldProfileExists();
-
-  // Returns true if there has been a significant change between the old
-  // profile and the current profile.
-  // This returns false if profiling is disabled.
-  bool IsProfileChangeSignificant();
-
-  // Copy the current profile to the old profile location.
-  void CopyProfileFile();
-
   // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
   // oat file.
@@ -309,16 +295,6 @@
   // Returns an empty string if we can't get the dalvik cache directory path.
   std::string DalvikCacheDirectory();
 
-  // Constructs the filename for the profile file.
-  // Returns an empty string if we do not have the necessary information to
-  // construct the filename.
-  std::string ProfileFileName();
-
-  // Constructs the filename for the old profile file.
-  // Returns an empty string if we do not have the necessary information to
-  // construct the filename.
-  std::string OldProfileFileName();
-
   // Returns the current image location.
   // Returns an empty string if the image location could not be retrieved.
   //
@@ -364,35 +340,18 @@
   // The caller shouldn't clean up or free the returned pointer.
   const ImageInfo* GetImageInfo();
 
-  // Returns the loaded profile.
-  // Loads the profile if needed. Returns null if the profile failed
-  // to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  ProfileFile* GetProfile();
-
-  // Returns the loaded old profile.
-  // Loads the old profile if needed. Returns null if the old profile
-  // failed to load.
-  // The caller shouldn't clean up or free the returned pointer.
-  ProfileFile* GetOldProfile();
-
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
   ScopedFlock flock_;
 
   std::string dex_location_;
+  const int target_compilation_type_mask_;
 
   // In a properly constructed OatFileAssistant object, isa_ should be either
   // the 32 or 64 bit variant for the current device.
   const InstructionSet isa_ = kNone;
 
-  // The package name, used solely to find the profile file.
-  // This may be null in a properly constructed object. In this case,
-  // profile_load_attempted_ and old_profile_load_attempted_ will be true, and
-  // profile_load_succeeded_ and old_profile_load_succeeded_ will be false.
-  const char* package_name_ = nullptr;
-
   // Whether we will attempt to load oat files executable.
   bool load_executable_ = false;
 
@@ -451,18 +410,6 @@
   bool image_info_load_succeeded_ = false;
   ImageInfo cached_image_info_;
 
-  // Cached value of the profile file.
-  // Use the GetProfile method rather than accessing these directly.
-  bool profile_load_attempted_ = false;
-  bool profile_load_succeeded_ = false;
-  ProfileFile cached_profile_;
-
-  // Cached value of the profile file.
-  // Use the GetOldProfile method rather than accessing these directly.
-  bool old_profile_load_attempted_ = false;
-  bool old_profile_load_succeeded_ = false;
-  ProfileFile cached_old_profile_;
-
   // For debugging only.
   // If this flag is set, the oat or odex file has been released to the user
   // of the OatFileAssistant object and the OatFileAssistant object is in a
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 83d4457..4541468 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -260,7 +260,7 @@
   }
 
   void GenerateExtractOnlyOdexForTest(const std::string& dex_location,
-                                          const std::string& odex_location) {
+                                      const std::string& odex_location) {
     std::vector<std::string> args;
     args.push_back("--dex-file=" + dex_location);
     args.push_back("--oat-file=" + odex_location);
@@ -277,7 +277,26 @@
     EXPECT_EQ(odex_file->GetOatHeader().GetImageFileLocationOatChecksum(), 0u);
     EXPECT_EQ(odex_file->GetOatHeader().GetImageFileLocationOatDataBegin(), 0u);
     EXPECT_EQ(odex_file->GetOatHeader().GetImagePatchDelta(), 0);
-}
+  }
+
+  void GenerateProfileGuideOdexForTest(const std::string& dex_location,
+                                       const std::string& odex_location) {
+    std::vector<std::string> args;
+    args.push_back("--dex-file=" + dex_location);
+    args.push_back("--oat-file=" + odex_location);
+    ScratchFile profile_file;
+    args.push_back("--profile-file=" + profile_file.GetFilename());
+    std::string error_msg;
+    ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg;
+
+    // Verify the odex file was generated as expected.
+    std::unique_ptr<OatFile> odex_file(OatFile::Open(
+        odex_location.c_str(), odex_location.c_str(), nullptr, nullptr,
+        false, dex_location.c_str(), &error_msg));
+    printf("error %s", error_msg.c_str());
+    ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
+    EXPECT_TRUE(odex_file->IsProfileGuideCompiled());
+  }
 
  private:
   // Reserve memory around where the image will be loaded so other memory
@@ -344,7 +363,8 @@
 // Generate an oat file for the purposes of test, as opposed to testing
 // generation of oat files.
 static void GenerateOatForTest(const char* dex_location) {
-  OatFileAssistant oat_file_assistant(dex_location, kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location,
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   std::string error_msg;
   ASSERT_TRUE(oat_file_assistant.GenerateOatFile(&error_msg)) << error_msg;
@@ -356,7 +376,8 @@
   std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -379,7 +400,8 @@
 TEST_F(OatFileAssistantTest, NoDexNoOat) {
   std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.HasOriginalDexFiles());
@@ -400,7 +422,8 @@
   Copy(GetDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str());
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
@@ -422,7 +445,8 @@
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str());
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 
@@ -448,7 +472,8 @@
   // is out of date.
   Copy(GetMultiDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
@@ -475,6 +500,7 @@
   // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
                                       oat_location.c_str(),
+                                      OatFileAssistant::kFullCompilation,
                                       kRuntimeISA, true);
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -495,7 +521,8 @@
   GenerateOatForTest(dex_location.c_str());
   Copy(GetDexSrc2(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
@@ -508,32 +535,6 @@
   EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
 }
 
-// Case: We have a DEX file and an extract-only ODEX file out of date relative
-//       to the DEX file.
-// Expect: The status is kDex2OatNeeded.
-TEST_F(OatFileAssistantTest, ExtractOnlyOdexOutOfDate) {
-  std::string dex_location = GetScratchDir() + "/ExtractOnlyOdexOutOfDate.jar";
-  std::string odex_location = GetOdexDir() + "/ExtractOnlyOdexOutOfDate.odex";
-
-  // We create a dex, generate an oat for it, then overwrite the dex with a
-  // different dex to make the oat out of date.
-  Copy(GetDexSrc1(), dex_location);
-  GenerateExtractOnlyOdexForTest(dex_location.c_str(), odex_location.c_str());
-  Copy(GetDexSrc2(), dex_location);
-
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
-
-  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_TRUE(oat_file_assistant.OdexFileExists());
-  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileExists());
-  EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
-  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles());
-}
-
 // Case: We have a DEX file and an ODEX file, but no OAT file.
 // Expect: The status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, DexOdexNoOat) {
@@ -545,7 +546,8 @@
   GenerateOdexForTest(dex_location, odex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -578,7 +580,8 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -633,7 +636,8 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -681,7 +685,8 @@
   Copy(GetStrippedDexSrc1(), dex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -724,7 +729,7 @@
   GenerateOdexForTest(dex_location, oat_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, true);
+      oat_location.c_str(), OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -782,7 +787,7 @@
 
   // Verify things don't go bad.
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
-      oat_location.c_str(), kRuntimeISA, true);
+      oat_location.c_str(), OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -816,7 +821,8 @@
   GeneratePicOdexForTest(dex_location, odex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -841,7 +847,9 @@
   GenerateExtractOnlyOdexForTest(dex_location, odex_location);
 
   // Verify the status.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation | OatFileAssistant::kExtractOnly,
+      kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -864,7 +872,8 @@
   GenerateOatForTest(dex_location.c_str());
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -883,7 +892,8 @@
   GenerateOatForTest(dex_location.c_str());
 
   // Load the oat using an oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -903,7 +913,8 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+      dex_location.c_str(), oat_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
   std::string error_msg;
   ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
@@ -917,7 +928,8 @@
   EXPECT_TRUE(OS::FileExists(oat_location.c_str()));
 
   // Verify it didn't create an oat in the default location.
-  OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant ofm(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
   EXPECT_FALSE(ofm.OatFileExists());
 }
 
@@ -933,7 +945,8 @@
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+      dex_location.c_str(), oat_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
   std::string error_msg;
   ASSERT_FALSE(oat_file_assistant.MakeUpToDate(&error_msg));
 
@@ -948,7 +961,8 @@
   std::string oat_location = GetScratchDir() + "/GenNoDex.oat";
 
   OatFileAssistant oat_file_assistant(
-      dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true);
+      dex_location.c_str(), oat_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
   std::string error_msg;
   ASSERT_FALSE(oat_file_assistant.GenerateOatFile(&error_msg));
 }
@@ -996,7 +1010,8 @@
   Copy(GetDexSrc1(), abs_dex_location);
 
   std::string dex_location = MakePathRelative(abs_dex_location);
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
@@ -1013,7 +1028,8 @@
 TEST_F(OatFileAssistantTest, ShortDexLocation) {
   std::string dex_location = "/xx";
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
@@ -1037,7 +1053,8 @@
   std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
   Copy(GetDexSrc1(), dex_location);
 
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
 
   EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
@@ -1134,7 +1151,8 @@
   GenerateOdexForTest(dex_location, odex_location);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1156,7 +1174,8 @@
   GenerateOdexForTest(dex_location, odex_location);
 
   // Load the oat using an executable oat file assistant.
-  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, true);
 
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
@@ -1184,6 +1203,45 @@
         "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
 }
 
+// Case: We have a DEX file, extract-only ODEX, and fully compiled OAT.
+// Expect: The status depends on the target compilation type mask.
+TEST_F(OatFileAssistantTest, TargetCompilationType) {
+  std::string dex_location = GetScratchDir() + "/TargetCompilationType.jar";
+  std::string odex_location = GetOdexDir() + "/TargetCompilationType.odex";
+  Copy(GetDexSrc1(), dex_location);
+  GenerateExtractOnlyOdexForTest(dex_location, odex_location);
+  GenerateOatForTest(dex_location.c_str());
+
+  OatFileAssistant ofa_full(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation, kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, ofa_full.GetDexOptNeeded());
+  EXPECT_FALSE(ofa_full.IsInBootClassPath());
+  EXPECT_TRUE(ofa_full.OdexFileIsOutOfDate());
+  EXPECT_TRUE(ofa_full.OatFileIsUpToDate());
+
+  OatFileAssistant ofa_extract(dex_location.c_str(),
+      OatFileAssistant::kExtractOnly, kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, ofa_extract.GetDexOptNeeded());
+  EXPECT_FALSE(ofa_extract.IsInBootClassPath());
+  EXPECT_TRUE(ofa_extract.OdexFileIsUpToDate());
+  EXPECT_TRUE(ofa_extract.OatFileIsOutOfDate());
+
+  OatFileAssistant ofa_profile(dex_location.c_str(),
+      OatFileAssistant::kProfileGuideCompilation, kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, ofa_profile.GetDexOptNeeded());
+  EXPECT_FALSE(ofa_profile.IsInBootClassPath());
+  EXPECT_TRUE(ofa_profile.OdexFileIsOutOfDate());
+  EXPECT_TRUE(ofa_profile.OatFileIsOutOfDate());
+
+  OatFileAssistant ofa_extract_full(dex_location.c_str(),
+      OatFileAssistant::kFullCompilation | OatFileAssistant::kExtractOnly,
+      kRuntimeISA, false);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, ofa_extract_full.GetDexOptNeeded());
+  EXPECT_FALSE(ofa_extract_full.IsInBootClassPath());
+  EXPECT_TRUE(ofa_extract_full.OdexFileIsUpToDate());
+  EXPECT_TRUE(ofa_extract_full.OatFileIsUpToDate());
+}
+
 // Verify the dexopt status values from dalvik.system.DexFile
 // match the OatFileAssistant::DexOptStatus values.
 TEST_F(OatFileAssistantTest, DexOptStatusValues) {
@@ -1218,13 +1276,31 @@
   ASSERT_FALSE(self_patchoat_needed == nullptr);
   EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get()));
+
+    ArtField* compilation_type_full = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "COMPILATION_TYPE_FULL", "I");
+  ASSERT_FALSE(compilation_type_full == nullptr);
+  EXPECT_EQ(compilation_type_full->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kFullCompilation, compilation_type_full->GetInt(dexfile.Get()));
+
+  ArtField* compilation_type_profile_guide = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "COMPILATION_TYPE_PROFILE_GUIDE", "I");
+  ASSERT_FALSE(compilation_type_profile_guide == nullptr);
+  EXPECT_EQ(compilation_type_profile_guide->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kProfileGuideCompilation,
+            compilation_type_profile_guide->GetInt(dexfile.Get()));
+
+  ArtField* compilation_type_extract_only = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "COMPILATION_TYPE_EXTRACT_ONLY", "I");
+  ASSERT_FALSE(compilation_type_extract_only == nullptr);
+  EXPECT_EQ(compilation_type_extract_only->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kExtractOnly, compilation_type_extract_only->GetInt(dexfile.Get()));
 }
 
 // TODO: More Tests:
 //  * Test class linker falls back to unquickened dex for DexNoOat
 //  * Test class linker falls back to unquickened dex for MultiDexNoOat
 //  * Test using secondary isa
-//  * Test with profiling info?
 //  * Test for status of oat while oat is being generated (how?)
 //  * Test case where 32 and 64 bit boot class paths differ,
 //      and we ask IsInBootClassPath for a class in exactly one of the 32 or
@@ -1233,5 +1309,7 @@
 //    - Dex is stripped, don't have odex.
 //    - Oat file corrupted after status check, before reload unexecutable
 //    because it's unrelocated and no dex2oat
+//  * Test unrelocated specific target compilation type can be relocated to
+//    make it up to date.
 
 }  // namespace art
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 9ae179f..e57125b 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -307,8 +307,13 @@
   Thread* const self = Thread::Current();
   Locks::mutator_lock_->AssertNotHeld(self);
   Runtime* const runtime = Runtime::Current();
+
+  int target_compilation_type_mask = OatFileAssistant::kFullCompilation
+    | OatFileAssistant::kProfileGuideCompilation
+    | OatFileAssistant::kExtractOnly;
   OatFileAssistant oat_file_assistant(dex_location,
                                       oat_location,
+                                      target_compilation_type_mask,
                                       kRuntimeISA,
                                       !runtime->IsAotCompiler());
 
@@ -443,6 +448,10 @@
           + std::string(dex_location));
     }
   }
+
+  // TODO(calin): Consider optimizing this knowing that is useless to record the
+  // use of fully compiled apks.
+  Runtime::Current()->NotifyDexLoaded(dex_location);
   return dex_files;
 }
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 2b7eca2..daabc6e 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -63,16 +63,24 @@
     return gc_map_offset_ == 0 && vmap_table_offset_ != 0;
   }
 
-  CodeInfo GetOptimizedCodeInfo() const {
+  const void* GetOptimizedCodeInfoPtr() const {
     DCHECK(IsOptimized());
     const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
-    return CodeInfo(data);
+    return data;
+  }
+
+  CodeInfo GetOptimizedCodeInfo() const {
+    return CodeInfo(GetOptimizedCodeInfoPtr());
   }
 
   const uint8_t* GetCode() const {
     return code_;
   }
 
+  uint32_t GetCodeSize() const {
+    return code_size_;
+  }
+
   const uint8_t* GetNativeGcMap() const {
     return (gc_map_offset_ == 0) ? nullptr : code_ - gc_map_offset_;
   }
@@ -111,7 +119,7 @@
   uint32_t GetFrameSizeInBytes() const {
     uint32_t result = frame_info_.FrameSizeInBytes();
     if (kCheckFrameSize) {
-      DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
+      DCHECK_ALIGNED(result, kStackAlignment);
     }
     return result;
   }
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index d64aa43..60403f9 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -166,6 +166,9 @@
       .Define("-Xjitwarmupthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITWarmupThreshold)
+      .Define("-Xjitosrthreshold:_")
+          .WithType<unsigned int>()
+          .IntoKey(M::JITOsrThreshold)
       .Define("-Xjitsaveprofilinginfo")
           .WithValue(true)
           .IntoKey(M::JITSaveProfilingInfo)
@@ -694,6 +697,8 @@
   UsageMessage(stream, "  -Xusejit:booleanvalue\n");
   UsageMessage(stream, "  -Xjitinitialsize:N\n");
   UsageMessage(stream, "  -Xjitmaxsize:N\n");
+  UsageMessage(stream, "  -Xjitwarmupthreshold:integervalue\n");
+  UsageMessage(stream, "  -Xjitosrthreshold:integervalue\n");
   UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "  -X[no]dex2oat (Whether to invoke dex2oat on the application)\n");
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 9b10f2e..c7ccee2 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -744,9 +744,12 @@
     return false;
   }
   DCHECK_GE(field->GetOffset().Int32Value(), 0);
+  // Do not interleave function calls with bit field writes to placate valgrind. Bug: 27552451.
+  uint32_t field_offset = field->GetOffset().Uint32Value();
+  bool is_volatile = field->IsVolatile();
   result->field_idx = field_idx;
-  result->field_offset = field->GetOffset().Int32Value();
-  result->is_volatile = field->IsVolatile();
+  result->field_offset = field_offset;
+  result->is_volatile = is_volatile ? 1u : 0u;
   return true;
 }
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index bbb79af..e95f2c5 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -119,6 +119,7 @@
 #include "os.h"
 #include "parsed_options.h"
 #include "profiler.h"
+#include "jit/profile_saver.h"
 #include "quick/quick_method_frame_info.h"
 #include "reflection.h"
 #include "runtime_options.h"
@@ -1700,7 +1701,9 @@
 }
 
 void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
-                              const std::string& profile_output_filename) {
+                              const std::string& profile_output_filename,
+                              const std::string& foreign_dex_profile_path,
+                              const std::string& app_dir) {
   if (jit_.get() == nullptr) {
     // We are not JITing. Nothing to do.
     return;
@@ -1723,7 +1726,18 @@
   }
 
   profile_output_filename_ = profile_output_filename;
-  jit_->StartProfileSaver(profile_output_filename, code_paths);
+  jit_->StartProfileSaver(profile_output_filename,
+                          code_paths,
+                          foreign_dex_profile_path,
+                          app_dir);
+}
+
+void Runtime::NotifyDexLoaded(const std::string& dex_location) {
+  VLOG(profiler) << "Notify dex loaded: " << dex_location;
+  // We know that if the ProfileSaver is started then we can record profile information.
+  if (ProfileSaver::IsStarted()) {
+    ProfileSaver::NotifyDexUse(dex_location);
+  }
 }
 
 // Transaction support.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 83e77d2..8e99f80 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -467,7 +467,10 @@
   }
 
   void RegisterAppInfo(const std::vector<std::string>& code_paths,
-                       const std::string& profile_output_filename);
+                       const std::string& profile_output_filename,
+                       const std::string& foreign_dex_profile_path,
+                       const std::string& app_dir);
+  void NotifyDexLoaded(const std::string& dex_location);
 
   // Transaction support.
   bool IsActiveTransaction() const {
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 8237b06..bc963c5 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -36,6 +36,7 @@
 
 static constexpr bool kDumpHeapObjectOnSigsevg = false;
 static constexpr bool kUseSigRTTimeout = true;
+static constexpr bool kDumpNativeStackOnTimeout = true;
 
 struct Backtrace {
  public:
@@ -350,7 +351,9 @@
   if (runtime != nullptr) {
     if (IsTimeoutSignal(signal_number)) {
       // Special timeout signal. Try to dump all threads.
-      runtime->GetThreadList()->DumpForSigQuit(LOG(INTERNAL_FATAL));
+      // Note: Do not use DumpForSigQuit, as that might disable native unwind, but the native parts
+      //       are of value here.
+      runtime->GetThreadList()->Dump(LOG(INTERNAL_FATAL), kDumpNativeStackOnTimeout);
     }
     gc::Heap* heap = runtime->GetHeap();
     LOG(INTERNAL_FATAL) << "Fault message: " << runtime->GetFaultMessage();
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 838d1a9..3fd9905 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -69,7 +69,8 @@
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
 RUNTIME_OPTIONS_KEY (bool,                DumpNativeStackOnSigQuit,       true)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
-RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold)
+RUNTIME_OPTIONS_KEY (unsigned int,        JITOsrThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
 RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk
index c154eb6..5c71da6 100644
--- a/runtime/simulator/Android.mk
+++ b/runtime/simulator/Android.mk
@@ -86,7 +86,7 @@
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
   # For simulator_arm64.
   ifeq ($$(art_ndebug_or_debug),debug)
-     LOCAL_SHARED_LIBRARIES += libvixld
+     LOCAL_SHARED_LIBRARIES += libvixl
   else
      LOCAL_SHARED_LIBRARIES += libvixl
   endif
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 4c81d4f..afb11d3 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -57,6 +57,10 @@
 static constexpr useconds_t kThreadSuspendMaxYieldUs = 3000;
 static constexpr useconds_t kThreadSuspendMaxSleepUs = 5000;
 
+// Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
+// some history.
+static constexpr bool kDumpUnattachedThreadNativeStack = true;
+
 ThreadList::ThreadList()
     : suspend_all_count_(0),
       debug_suspend_all_count_(0),
@@ -138,9 +142,7 @@
   // refactor DumpState to avoid skipping analysis.
   Thread::DumpState(os, nullptr, tid);
   DumpKernelStack(os, tid, "  kernel: ", false);
-  // TODO: Reenable this when the native code in system_server can handle it.
-  // Currently "adb shell kill -3 `pid system_server`" will cause it to exit.
-  if (false) {
+  if (kDumpUnattachedThreadNativeStack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
   os << "\n";
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 13564a6..472a85c 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1120,7 +1120,8 @@
   }
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
   if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
-    os << prefix << "(backtrace::Unwind failed for thread " << tid << ")\n";
+    os << prefix << "(backtrace::Unwind failed for thread " << tid
+       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
     return;
   } else if (backtrace->NumFrames() == 0) {
     os << prefix << "(no native stack frames for thread " << tid << ")\n";
diff --git a/test/004-checker-UnsafeTest18/src/Main.java b/test/004-checker-UnsafeTest18/src/Main.java
index bb6de2e..bb020b9 100644
--- a/test/004-checker-UnsafeTest18/src/Main.java
+++ b/test/004-checker-UnsafeTest18/src/Main.java
@@ -87,18 +87,36 @@
 
   /// CHECK-START: void Main.load() intrinsics_recognition (after)
   /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeLoadFence
+  //
+  /// CHECK-START: void Main.load() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeLoadFence
+  //
+  /// CHECK-START: void Main.load() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:LoadAny
   private static void load() {
     unsafe.loadFence();
   }
 
   /// CHECK-START: void Main.store() intrinsics_recognition (after)
   /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeStoreFence
+  //
+  /// CHECK-START: void Main.store() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeStoreFence
+  //
+  /// CHECK-START: void Main.store() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:AnyStore
   private static void store() {
     unsafe.storeFence();
   }
 
   /// CHECK-START: void Main.full() intrinsics_recognition (after)
   /// CHECK-DAG: InvokeVirtual intrinsic:UnsafeFullFence
+  //
+  /// CHECK-START: void Main.full() instruction_simplifier (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeFullFence
+  //
+  /// CHECK-START: void Main.full() instruction_simplifier (after)
+  /// CHECK-DAG: MemoryBarrier kind:AnyAny
   private static void full() {
     unsafe.fullFence();
   }
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index bcb697a..15683b0 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -181,6 +181,7 @@
         Class intHolder = loader.loadClass("IntHolder");
         Method loadLibrary = intHolder.getDeclaredMethod("loadLibrary", String.class);
         loadLibrary.invoke(intHolder, nativeLibraryName);
+        waitForCompilation(intHolder);
         return new WeakReference(loader);
     }
 
diff --git a/test/145-alloc-tracking-stress/src/Main.java b/test/145-alloc-tracking-stress/src/Main.java
index 752fdd9..418690a 100644
--- a/test/145-alloc-tracking-stress/src/Main.java
+++ b/test/145-alloc-tracking-stress/src/Main.java
@@ -1,5 +1,4 @@
 /*
-
  * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index 8640148..dd4ffe4 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -1601,6 +1601,34 @@
     return (short) (value & 0x17fff);
   }
 
+  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<LE:z\d+>>       LessThanOrEqual [<<Const42>>,<<Arg>>]
+
+  /// CHECK-START: int Main.intReverseCondition(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GE:z\d+>>       GreaterThanOrEqual [<<Arg>>,<<Const42>>]
+
+  public static int intReverseCondition(int i) {
+    return (42 > i) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
+  /// CHECK-DAG:      <<CMP:i\d+>>      Compare [<<Const42>>,<<Result>>]
+
+  /// CHECK-START: int Main.intReverseConditionNaN(int) instruction_simplifier (after)
+  /// CHECK-DAG:      <<Const42:d\d+>>  DoubleConstant 42
+  /// CHECK-DAG:      <<Result:d\d+>>   InvokeStaticOrDirect
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Result>>,<<Const42>>]
+
+  public static int intReverseConditionNaN(int i) {
+    return (42 != Math.sqrt(i)) ? 13 : 54;
+  }
+
   public static int runSmaliTest(String name, boolean input) {
     try {
       Class<?> c = Class.forName("SmaliTests");
@@ -1611,7 +1639,7 @@
     }
   }
 
-  public static void main(String[] args) {
+public static void main(String[] args) {
     int arg = 123456;
 
     assertLongEquals(Add0(arg), arg);
@@ -1740,6 +1768,9 @@
     assertIntEquals(intAnd0x17fffToShort(Integer.MIN_VALUE), 0);
     assertIntEquals(intAnd0x17fffToShort(Integer.MAX_VALUE), Short.MAX_VALUE);
 
+    assertIntEquals(intReverseCondition(41), 13);
+    assertIntEquals(intReverseConditionNaN(-5), 13);
+
     for (String condition : new String[] { "Equal", "NotEqual" }) {
       for (String constant : new String[] { "True", "False" }) {
         for (String side : new String[] { "Rhs", "Lhs" }) {
diff --git a/test/537-checker-jump-over-jump/src/Main.java b/test/537-checker-jump-over-jump/src/Main.java
index cf9a69d..7a58e8b 100644
--- a/test/537-checker-jump-over-jump/src/Main.java
+++ b/test/537-checker-jump-over-jump/src/Main.java
@@ -24,7 +24,7 @@
   //
   /// CHECK:                            If
   /// CHECK-NEXT:                       cmp
-  /// CHECK-NEXT:                       jnl/ge
+  /// CHECK-NEXT:                       jle/ng
   //
   /// CHECK-DAG:   <<Fibs:l\d+>>        StaticFieldGet
   /// CHECK-DAG:                        NullCheck [<<Fibs>>]
diff --git a/test/564-checker-negbitwise/src/Main.java b/test/564-checker-negbitwise/src/Main.java
index 3de7be7..ccb8ff4 100644
--- a/test/564-checker-negbitwise/src/Main.java
+++ b/test/564-checker-negbitwise/src/Main.java
@@ -45,7 +45,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Mask:i\d+>>        ParameterValue
-  /// CHECK:       <<NegOp:i\d+>>       Arm64BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
   /// CHECK:                            Return [<<NegOp>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm64 (after)
@@ -55,6 +55,27 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAnd(int, int) disassembly (after)
   /// CHECK:                            bic w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        And
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAnd(int, int) disassembly (after)
+  /// CHECK:                            bic.w r{{\d+}}, r{{\d+}}, r{{\d+}}
+
   public static int $opt$noinline$notAnd(int base, int mask) {
     if (doThrow) throw new Error();
     return base & ~mask;
@@ -74,7 +95,7 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Base:j\d+>>        ParameterValue
   /// CHECK:       <<Mask:j\d+>>        ParameterValue
-  /// CHECK:       <<NegOp:j\d+>>       Arm64BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
+  /// CHECK:       <<NegOp:j\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
   /// CHECK:                            Return [<<NegOp>>]
 
   /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm64 (after)
@@ -84,6 +105,27 @@
   /// CHECK-START-ARM64: long Main.$opt$noinline$notOr(long, long) disassembly (after)
   /// CHECK:                            orn x{{\d+}}, x{{\d+}}, x{{\d+}}
 
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<Not:j\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:j\d+>>          Or [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:j\d+>>        ParameterValue
+  /// CHECK:       <<Mask:j\d+>>        ParameterValue
+  /// CHECK:       <<NegOp:j\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Or
+  /// CHECK:                            Return [<<NegOp>>]
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        Not
+  /// CHECK-NOT:                        Or
+
+  /// CHECK-START-ARM:   long Main.$opt$noinline$notOr(long, long) disassembly (after)
+  /// CHECK:                            orn.w r{{\d+}}, r{{\d+}}, r{{\d+}}
+
   public static long $opt$noinline$notOr(long base, long mask) {
     if (doThrow) throw new Error();
     return base | ~mask;
@@ -103,7 +145,7 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Mask:i\d+>>        ParameterValue
-  /// CHECK:       <<NegOp:i\d+>>       Arm64BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Xor
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Base>>,<<Mask>>] kind:Xor
   /// CHECK:                            Return [<<NegOp>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm64 (after)
@@ -113,39 +155,63 @@
   /// CHECK-START-ARM64: int Main.$opt$noinline$notXor(int, int) disassembly (after)
   /// CHECK:                            eon w{{\d+}}, w{{\d+}}, w{{\d+}}
 
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op:i\d+>>          Xor [<<Base>>,<<Not>>]
+  /// CHECK:                            Return [<<Op>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notXor(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
   public static int $opt$noinline$notXor(int base, int mask) {
     if (doThrow) throw new Error();
     return base ^ ~mask;
   }
 
   /**
-   * Check that the transformation is also done when the base is a constant.
+   * Check that transformation is done when the argument is a constant.
    */
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) instruction_simplifier_arm64 (before)
-  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Constant:i\d+>>    IntConstant
-  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
-  /// CHECK:       <<Op:i\d+>>          Xor [<<Not>>,<<Constant>>]
+  /// CHECK:       <<Not:i\d+>>         Not [<<Base>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Not>>,<<Constant>>]
   /// CHECK:                            Return [<<Op>>]
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) instruction_simplifier_arm64 (after)
-  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK-START-ARM64: int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
   /// CHECK:       <<Constant:i\d+>>    IntConstant
-  /// CHECK:       <<NegOp:i\d+>>       Arm64BitwiseNegatedRight [<<Constant>>,<<Mask>>] kind:Xor
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Constant>>,<<Base>>] kind:And
   /// CHECK:                            Return [<<NegOp>>]
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Not
-  /// CHECK-NOT:                        Xor
 
-  /// CHECK-START-ARM64: int Main.$opt$noinline$notXorConstant(int) disassembly (after)
-  /// CHECK:                            mov <<Reg:w\d+>>, #0xf
-  /// CHECK:                            eon w{{\d+}}, <<Reg>>, w{{\d+}}
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Base>>]
+  /// CHECK:       <<Op:i\d+>>          And [<<Not>>,<<Constant>>]
+  /// CHECK:                            Return [<<Op>>]
 
-  public static int $opt$noinline$notXorConstant(int mask) {
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndConstant(int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Constant:i\d+>>    IntConstant
+  /// CHECK:       <<NegOp:i\d+>>       BitwiseNegatedRight [<<Constant>>,<<Base>>] kind:And
+  /// CHECK:                            Return [<<NegOp>>]
+
+  public static int $opt$noinline$notAndConstant(int mask) {
     if (doThrow) throw new Error();
-    return 0xf ^ ~mask;
+    return 0xf & ~mask;
   }
 
   /**
@@ -173,7 +239,31 @@
   /// CHECK:                            Return [<<Add>>]
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64BitwiseNegatedRight
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (before)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (after)
+  /// CHECK:       <<Base:i\d+>>        ParameterValue
+  /// CHECK:       <<Mask:i\d+>>        ParameterValue
+  /// CHECK:       <<One:i\d+>>         IntConstant
+  /// CHECK:       <<Not:i\d+>>         Not [<<Mask>>]
+  /// CHECK:       <<Op1:i\d+>>         And [<<Not>>,<<One>>]
+  /// CHECK:       <<Op2:i\d+>>         And [<<Base>>,<<Not>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Op1>>,<<Op2>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$notAndMultipleUses(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
 
   public static int $opt$noinline$notAndMultipleUses(int base, int mask) {
     if (doThrow) throw new Error();
@@ -189,7 +279,10 @@
   // have been applied then Not/Not/Or is replaced by And/Not.
 
   /// CHECK-START-ARM64: int Main.$opt$noinline$deMorganOr(int, int) instruction_simplifier_arm64 (after)
-  /// CHECK-NOT:                        Arm64BitwiseNegatedRight
+  /// CHECK-NOT:                        BitwiseNegatedRight
+
+  /// CHECK-START-ARM:   int Main.$opt$noinline$deMorganOr(int, int) instruction_simplifier_arm (after)
+  /// CHECK-NOT:                        BitwiseNegatedRight
 
   public static int $opt$noinline$deMorganOr(int a, int b) {
     if (doThrow) throw new Error();
@@ -200,7 +293,7 @@
     assertIntEquals(0xe,   $opt$noinline$notAnd(0xf, 0x1));
     assertLongEquals(~0x0, $opt$noinline$notOr(0xf, 0x1));
     assertIntEquals(~0xe,  $opt$noinline$notXor(0xf, 0x1));
-    assertIntEquals(~0xe,  $opt$noinline$notXorConstant(0x1));
+    assertIntEquals(0xe,  $opt$noinline$notAndConstant(0x1));
     assertIntEquals(0xe,   $opt$noinline$notAndMultipleUses(0xf, 0x1));
     assertIntEquals(~0x1,  $opt$noinline$deMorganOr(0x3, 0x1));
   }
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/577-profile-foreign-dex/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/577-profile-foreign-dex/expected.txt
diff --git a/test/577-profile-foreign-dex/info.txt b/test/577-profile-foreign-dex/info.txt
new file mode 100644
index 0000000..090db3f
--- /dev/null
+++ b/test/577-profile-foreign-dex/info.txt
@@ -0,0 +1 @@
+Check that we record the use of foreign dex files when profiles are enabled.
diff --git a/test/577-profile-foreign-dex/run b/test/577-profile-foreign-dex/run
new file mode 100644
index 0000000..ad57d14
--- /dev/null
+++ b/test/577-profile-foreign-dex/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+exec ${RUN} \
+  --runtime-option -Xjitsaveprofilinginfo \
+  --runtime-option -Xusejit:true \
+  "${@}"
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/577-profile-foreign-dex/src-ex/OtherDex.java
new file mode 100644
index 0000000..cba73b3
--- /dev/null
+++ b/test/577-profile-foreign-dex/src-ex/OtherDex.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+public class OtherDex {
+}
diff --git a/test/577-profile-foreign-dex/src/Main.java b/test/577-profile-foreign-dex/src/Main.java
new file mode 100644
index 0000000..0cd85b5
--- /dev/null
+++ b/test/577-profile-foreign-dex/src/Main.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Constructor;
+import java.util.HashMap;
+
+public class Main {
+
+  private static final String PROFILE_NAME = "primary.prof";
+  private static final String APP_DIR_PREFIX = "app_dir_";
+  private static final String FOREIGN_DEX_PROFILE_DIR = "foreign-dex";
+  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
+  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
+
+  public static void main(String[] args) throws Exception {
+    File tmpFile = null;
+    File appDir = null;
+    File profileFile = null;
+    File foreignDexProfileDir = null;
+
+    try {
+      // Create the necessary files layout.
+      tmpFile = createTempFile();
+      appDir = new File(tmpFile.getParent(), APP_DIR_PREFIX + tmpFile.getName());
+      appDir.mkdir();
+      foreignDexProfileDir = new File(tmpFile.getParent(), FOREIGN_DEX_PROFILE_DIR);
+      foreignDexProfileDir.mkdir();
+      profileFile = createTempFile();
+
+      String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
+
+      // Register the app with the runtime
+      VMRuntime.registerAppInfo(profileFile.getPath(), appDir.getPath(),
+             new String[] { codePath }, foreignDexProfileDir.getPath());
+
+      testMarkerForForeignDex(foreignDexProfileDir);
+      testMarkerForCodePath(foreignDexProfileDir);
+      testMarkerForApplicationDexFile(foreignDexProfileDir, appDir);
+    } finally {
+      if (tmpFile != null) {
+        tmpFile.delete();
+      }
+      if (profileFile != null) {
+        profileFile.delete();
+      }
+      if (foreignDexProfileDir != null) {
+        foreignDexProfileDir.delete();
+      }
+      if (appDir != null) {
+        appDir.delete();
+      }
+    }
+  }
+
+  // Verify we actually create a marker on disk for foreign dex files.
+  private static void testMarkerForForeignDex(File foreignDexProfileDir) throws Exception {
+    String foreignDex = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar";
+    loadDexFile(foreignDex);
+    checkMarker(foreignDexProfileDir, foreignDex, /* exists */ true);
+  }
+
+  // Verify we do not create a marker on disk for dex files path of the code path.
+  private static void testMarkerForCodePath(File foreignDexProfileDir) throws Exception {
+    String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
+    loadDexFile(codePath);
+    checkMarker(foreignDexProfileDir, codePath, /* exists */ false);
+  }
+
+  private static void testMarkerForApplicationDexFile(File foreignDexProfileDir, File appDir)
+      throws Exception {
+    // Copy the -ex jar to the application directory and load it from there.
+    // This will record duplicate class conflicts but we don't care for this use case.
+    File foreignDex = new File(System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar");
+    File appDex = new File(appDir, "appDex.jar");
+    try {
+      copyFile(foreignDex, appDex);
+
+      loadDexFile(appDex.getAbsolutePath());
+      checkMarker(foreignDexProfileDir, appDex.getAbsolutePath(), /* exists */ false);
+    } finally {
+      if (appDex != null) {
+        appDex.delete();
+      }
+    }
+  }
+
+  private static void checkMarker(File foreignDexProfileDir, String dexFile, boolean exists) {
+    File marker = new File(foreignDexProfileDir, dexFile.replace('/', '@'));
+    boolean result_ok = exists ? marker.exists() : !marker.exists();
+    if (!result_ok) {
+      throw new RuntimeException("Marker test failed for:" + marker.getPath());
+    }
+  }
+
+  private static void loadDexFile(String dexFile) throws Exception {
+    Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
+    if (pathClassLoader == null) {
+        throw new RuntimeException("Couldn't find path class loader class");
+    }
+    Constructor constructor =
+        pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
+    constructor.newInstance(
+            dexFile, ClassLoader.getSystemClassLoader());
+  }
+
+  private static class VMRuntime {
+    private static final Method registerAppInfoMethod;
+    static {
+      try {
+        Class c = Class.forName("dalvik.system.VMRuntime");
+        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
+            String.class, String.class, String[].class, String.class);
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    public static void registerAppInfo(String pkgName, String appDir,
+        String[] codePath, String foreignDexProfileDir) throws Exception {
+      registerAppInfoMethod.invoke(null, pkgName, appDir, codePath, foreignDexProfileDir);
+    }
+  }
+
+  private static void copyFile(File fromFile, File toFile) throws Exception {
+    FileInputStream in = new FileInputStream(fromFile);
+    FileOutputStream out = new FileOutputStream(toFile);
+    try {
+      byte[] buffer = new byte[4096];
+      int bytesRead;
+      while ((bytesRead = in.read(buffer)) >= 0) {
+          out.write(buffer, 0, bytesRead);
+      }
+    } finally {
+      out.flush();
+      try {
+          out.getFD().sync();
+      } catch (IOException e) {
+      }
+      out.close();
+      in.close();
+    }
+  }
+
+  private static File createTempFile() throws Exception {
+    try {
+      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+    } catch (IOException e) {
+      System.setProperty("java.io.tmpdir", "/data/local/tmp");
+      try {
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      } catch (IOException e2) {
+        System.setProperty("java.io.tmpdir", "/sdcard");
+        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+      }
+    }
+  }
+}
diff --git a/test/580-checker-round/expected.txt b/test/580-checker-round/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/580-checker-round/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/580-checker-round/info.txt b/test/580-checker-round/info.txt
new file mode 100644
index 0000000..d6397fd
--- /dev/null
+++ b/test/580-checker-round/info.txt
@@ -0,0 +1 @@
+Unit test for float/double rounding.
diff --git a/test/580-checker-round/src/Main.java b/test/580-checker-round/src/Main.java
new file mode 100644
index 0000000..9e248ef
--- /dev/null
+++ b/test/580-checker-round/src/Main.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.round32(float) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect intrinsic:MathRoundFloat
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static int round32(float f) {
+    return Math.round(f);
+  }
+
+  /// CHECK-START: long Main.round64(double) intrinsics_recognition (after)
+  /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect intrinsic:MathRoundDouble
+  /// CHECK-DAG:                 Return [<<Result>>]
+  private static long round64(double d) {
+    return Math.round(d);
+  }
+
+  public static void main(String args[]) {
+    // A few obvious numbers.
+    expectEquals32(-2147483648, round32(Float.NEGATIVE_INFINITY));
+    expectEquals32(-2, round32(-1.51f));
+    expectEquals32(-1, round32(-1.2f));
+    expectEquals32(-1, round32(-1.0f));
+    expectEquals32(-1, round32(-0.51f));
+    expectEquals32(0, round32(-0.2f));
+    expectEquals32(0, round32(-0.0f));
+    expectEquals32(0, round32(+0.0f));
+    expectEquals32(0, round32(+0.2f));
+    expectEquals32(1, round32(+0.5f));
+    expectEquals32(1, round32(+1.0f));
+    expectEquals32(1, round32(+1.2f));
+    expectEquals32(2, round32(+1.5f));
+    expectEquals32(2147483647, round32(Float.POSITIVE_INFINITY));
+
+    // Some others.
+    for (int i = -100; i <= 100; ++i) {
+      expectEquals32(i - 1, round32((float) i - 0.51f));
+      expectEquals32(i, round32((float) i));
+      expectEquals32(i + 1, round32((float) i + 0.5f));
+    }
+    for (float f = -1.5f; f <= -1.499f; f = Math.nextAfter(f, Float.POSITIVE_INFINITY)) {
+      expectEquals32(-1, round32(f));
+    }
+
+    // Some harder.
+    float[] fvals = {
+      -16777215.5f,
+      -16777215.0f,
+      -0.4999f,
+      0.4999f,
+      16777215.0f,
+      16777215.5f
+    };
+    int[] ivals = {
+      -16777216,
+      -16777215,
+      0,
+      0,
+      16777215,
+      16777216
+    };
+    for (int i = 0; i < fvals.length; i++) {
+      expectEquals32(ivals[i], round32(fvals[i]));
+    }
+
+    // A few NaN numbers.
+    float[] fnans = {
+      Float.intBitsToFloat(0x7f800001),
+      Float.intBitsToFloat(0x7fa00000),
+      Float.intBitsToFloat(0x7fc00000),
+      Float.intBitsToFloat(0x7fffffff),
+      Float.intBitsToFloat(0xff800001),
+      Float.intBitsToFloat(0xffa00000),
+      Float.intBitsToFloat(0xffc00000),
+      Float.intBitsToFloat(0xffffffff)
+    };
+    for (int i = 0; i < fnans.length; i++) {
+      expectEquals32(0, round32(fnans[i]));
+    }
+
+    // A few obvious numbers.
+    expectEquals64(-9223372036854775808L, round64(Double.NEGATIVE_INFINITY));
+    expectEquals64(-2L, round64(-1.51d));
+    expectEquals64(-1L, round64(-1.2d));
+    expectEquals64(-1L, round64(-1.0d));
+    expectEquals64(-1L, round64(-0.51d));
+    expectEquals64(0L, round64(-0.2d));
+    expectEquals64(0L, round64(-0.0d));
+    expectEquals64(0L, round64(+0.0d));
+    expectEquals64(0L, round64(+0.2d));
+    expectEquals64(1L, round64(+0.5d));
+    expectEquals64(1L, round64(+1.0d));
+    expectEquals64(1L, round64(+1.2d));
+    expectEquals64(2L, round64(+1.5d));
+    expectEquals64(9223372036854775807L, round64(Double.POSITIVE_INFINITY));
+
+    // Some others.
+    for (long l = -100; l <= 100; ++l) {
+      expectEquals64(l - 1, round64((double) l - 0.51d));
+      expectEquals64(l + 1, round64((double) l + 0.5d));
+      expectEquals64(l + 1, round64((double) l + 0.5d));
+    }
+    for (double d = -1.5d; d <= -1.49999999999d; d = Math.nextAfter(d, Double.POSITIVE_INFINITY)) {
+      expectEquals64(-1L, round64(d));
+    }
+
+    // Some harder.
+    double[] dvals = {
+      -9007199254740991.5d,
+      -9007199254740991.0d,
+      -0.49999999999999994d,
+      0.49999999999999994d,
+      9007199254740991.0d,
+      9007199254740991.5d
+    };
+    long[] lvals = {
+      -9007199254740992L,
+      -9007199254740991L,
+      0L,
+      0L,
+      9007199254740991L,
+      9007199254740992L
+    };
+    for (int i = 0; i < dvals.length; i++) {
+      expectEquals64(lvals[i], round64(dvals[i]));
+    }
+
+    // A few NaN numbers.
+    double[] dnans = {
+      Double.longBitsToDouble(0x7ff0000000000001L),
+      Double.longBitsToDouble(0x7ff4000000000000L),
+      Double.longBitsToDouble(0x7ff8000000000000L),
+      Double.longBitsToDouble(0x7fffffffffffffffL),
+      Double.longBitsToDouble(0xfff0000000000001L),
+      Double.longBitsToDouble(0xfff4000000000000L),
+      Double.longBitsToDouble(0xfff8000000000000L),
+      Double.longBitsToDouble(0xffffffffffffffffL)
+    };
+    for (int i = 0; i < dnans.length; i++) {
+      expectEquals64(0L, round64(dnans[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/580-checker-string-factory-intrinsics/expected.txt b/test/580-checker-string-factory-intrinsics/expected.txt
new file mode 100644
index 0000000..86e041d
--- /dev/null
+++ b/test/580-checker-string-factory-intrinsics/expected.txt
@@ -0,0 +1,3 @@
+foo
+bar
+baz
diff --git a/test/580-checker-string-factory-intrinsics/info.txt b/test/580-checker-string-factory-intrinsics/info.txt
new file mode 100644
index 0000000..3d01a19
--- /dev/null
+++ b/test/580-checker-string-factory-intrinsics/info.txt
@@ -0,0 +1 @@
+Ensure java.lang.StringFactory intrinsics are recognized and used.
diff --git a/test/580-checker-string-factory-intrinsics/src/Main.java b/test/580-checker-string-factory-intrinsics/src/Main.java
new file mode 100644
index 0000000..a2e34bf
--- /dev/null
+++ b/test/580-checker-string-factory-intrinsics/src/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: void Main.testNewStringFromBytes() builder (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromBytes intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromBytes() intrinsics_recognition (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromBytes intrinsic:StringNewStringFromBytes
+
+  public static void testNewStringFromBytes() {
+    byte[] bytes = { 'f', 'o', 'o' };
+    String s = StringFactory.newStringFromBytes(bytes, 0, 0, 3);
+    System.out.println(s);
+  }
+
+  // The (native) method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // is recognized as intrinsic StringNewStringFromChars.  However,
+  // because this method is not public, we cannot call it and check
+  // that the compiler actually intrinsifies it (as it does for the
+  // StringNewStringFromBytes and StringNewStringFromString
+  // intrinsics) with Checker.
+  //
+  // We can call a public method such as
+  //
+  //   java.lang.StringFactory.newStringFromChars(char[] data)
+  //
+  // which contains a call to the former (non-public) native method.
+  // However, this call will not be inlined (because it is a method in
+  // another Dex file and which contains a call, which needs an
+  // environment), so we cannot use Checker here to ensure the native
+  // call was intrinsified either.
+
+  /// CHECK-START: void Main.testNewStringFromChars() builder (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromChars intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromChars() intrinsics_recognition (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromChars intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromChars() inliner (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromChars intrinsic:None
+
+  public static void testNewStringFromChars() {
+    char[] chars = { 'b', 'a', 'r' };
+    String s = StringFactory.newStringFromChars(chars);
+    System.out.println(s);
+  }
+
+  /// CHECK-START: void Main.testNewStringFromString() builder (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromString intrinsic:None
+
+  /// CHECK-START: void Main.testNewStringFromString() intrinsics_recognition (after)
+  /// CHECK-DAG:     InvokeStaticOrDirect method_name:java.lang.StringFactory.newStringFromString intrinsic:StringNewStringFromString
+
+  public static void testNewStringFromString() {
+    String s1 = "baz";
+    String s2 = StringFactory.newStringFromString(s1);
+    System.out.println(s2);
+  }
+
+  public static void main(String[] args) throws Exception {
+    testNewStringFromBytes();
+    testNewStringFromChars();
+    testNewStringFromString();
+  }
+}
diff --git a/test/581-rtp/expected.txt b/test/581-rtp/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/581-rtp/expected.txt
diff --git a/test/581-rtp/info.txt b/test/581-rtp/info.txt
new file mode 100644
index 0000000..b57449a
--- /dev/null
+++ b/test/581-rtp/info.txt
@@ -0,0 +1,2 @@
+Regression test for the reference type propagation pass
+of the optimizing compiler that used to break invariants.
diff --git a/test/581-rtp/src/Main.java b/test/581-rtp/src/Main.java
new file mode 100644
index 0000000..09f6f6c
--- /dev/null
+++ b/test/581-rtp/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public final class Main {
+
+  /// CHECK-START: void Main.main(String[]) builder (after)
+  /// CHECK: StaticFieldGet klass:Main[] exact: true
+  /// CHECK: ArrayGet klass:Main exact:true
+  /// CHECK: BoundType klass:Main exact:true
+  public static void main(String[] args) {
+    Object o = null;
+    Main f = a[0];
+    for (int i = 0; i < 2; ++i) {
+      // We used to crash in the fixed point iteration of
+      // the reference type propagation while handling the instanceof:
+      // we were expecting `o` to get the same exact-ness as the
+      // `HBoundType` but the typing of the `ArrayGet` used to not
+      // propagate the exact-ness.
+      if (o instanceof Main) {
+        field = o;
+      }
+      o = f;
+    }
+    if (field != null) {
+      throw new Error("Expected null");
+    }
+  }
+
+  static Main[] a = new Main[1];
+  static Object field;
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c4f0171..7036bdc 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -567,7 +567,9 @@
   537-checker-arraycopy
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
-TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
+# 145: Test sometimes times out in read barrier configuration (b/27467554).
+TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := \
+  145-alloc-tracking-stress
 
 ifeq ($(ART_USE_READ_BARRIER),true)
   ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
diff --git a/test/valgrind-suppressions.txt b/test/valgrind-suppressions.txt
new file mode 100644
index 0000000..acab6e5
--- /dev/null
+++ b/test/valgrind-suppressions.txt
@@ -0,0 +1,15 @@
+{
+   b/27596582
+   Memcheck:Cond
+   fun:index
+   fun:expand_dynamic_string_token
+   fun:_dl_map_object
+   fun:map_doit
+   fun:_dl_catch_error
+   fun:do_preload
+   fun:dl_main
+   fun:_dl_sysdep_start
+   fun:_dl_start_final
+   fun:_dl_start
+   obj:/lib/x86_64-linux-gnu/ld-2.19.so
+}
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index fab4599..46100ae 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -270,10 +270,5 @@
   description: "Only work with --mode=activity",
   result: EXEC_FAILED,
   names: [ "libcore.java.io.FileTest#testJavaIoTmpdirMutable" ]
-},
-{
-  description: "Temporary suppressing while test is fixed",
-  result: EXEC_FAILED,
-  names: [ "org.apache.harmony.tests.java.util.ArrayDequeTest#test_forEachRemaining_iterator" ]
 }
 ]