Merge "ART: Get rid of most of java.lang.DexCache."
diff --git a/Android.mk b/Android.mk
index 0e86188..f8c5378 100644
--- a/Android.mk
+++ b/Android.mk
@@ -388,6 +388,7 @@
 # libstdc++ is needed when building for ART_TARGET_LINUX.
 ART_TARGET_SHARED_LIBRARY_BENCHMARK := $(TARGET_OUT_SHARED_LIBRARIES)/libartbenchmark.so
 build-art-target-golem: dex2oat dalvikvm patchoat linker libstdc++ \
+                        $(TARGET_OUT_EXECUTABLES)/art \
                         $(TARGET_OUT)/etc/public.libraries.txt \
                         $(ART_TARGET_DEX_DEPENDENCIES) \
                         $(ART_TARGET_SHARED_LIBRARY_DEPENDENCIES) \
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 27ec8b3..1876efc 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -215,6 +215,7 @@
     LOCAL_MODULE_PATH := $(3)
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
+      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_JAVA_LIBRARY)
@@ -230,6 +231,7 @@
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
+      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
diff --git a/build/art.go b/build/art.go
index e7f7e21..053968d 100644
--- a/build/art.go
+++ b/build/art.go
@@ -70,10 +70,10 @@
 			"-DART_READ_BARRIER_TYPE_IS_"+barrierType+"=1")
 	}
 
-	if envTrue(ctx, "ART_USE_VIXL_ARM_BACKEND") {
-		// Used to enable the new VIXL-based ARM code generator.
-		cflags = append(cflags, "-DART_USE_VIXL_ARM_BACKEND=1")
-		asflags = append(asflags, "-DART_USE_VIXL_ARM_BACKEND=1")
+	if envTrue(ctx, "ART_USE_OLD_ARM_BACKEND") {
+		// Used to enable the old, pre-VIXL ARM code generator.
+		cflags = append(cflags, "-DART_USE_OLD_ARM_BACKEND=1")
+		asflags = append(asflags, "-DART_USE_OLD_ARM_BACKEND=1")
 	}
 
 	return cflags, asflags
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 9a45379..8b30292 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -55,11 +55,17 @@
   // If the code size is 0 it means the method was skipped due to profile guided compilation.
   if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) {
     ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
-    uint32_t code_size = code.size();
+    const uint32_t code_size = code.size();
     ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
-    uint32_t vmap_table_offset = vmap_table.empty() ? 0u
+    const uint32_t vmap_table_offset = vmap_table.empty() ? 0u
         : sizeof(OatQuickMethodHeader) + vmap_table.size();
+    // The method info is directly before the vmap table.
+    ArrayRef<const uint8_t> method_info = compiled_method->GetMethodInfo();
+    const uint32_t method_info_offset = method_info.empty() ? 0u
+        : vmap_table_offset + method_info.size();
+
     OatQuickMethodHeader method_header(vmap_table_offset,
+                                       method_info_offset,
                                        compiled_method->GetFrameSizeInBytes(),
                                        compiled_method->GetCoreSpillMask(),
                                        compiled_method->GetFpSpillMask(),
@@ -68,11 +74,12 @@
     header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
     std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
     const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet());
-    const size_t size = vmap_table.size() + sizeof(method_header) + code_size;
+    const size_t size = method_info.size() + vmap_table.size() + sizeof(method_header) + code_size;
     chunk->reserve(size + max_padding);
     chunk->resize(sizeof(method_header));
     memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
     chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
+    chunk->insert(chunk->begin(), method_info.begin(), method_info.end());
     chunk->insert(chunk->end(), code.begin(), code.end());
     CHECK_EQ(chunk->size(), size);
     const void* unaligned_code_ptr = chunk->data() + (size - code_size);
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index f06d90c..0d9021f 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -105,15 +105,15 @@
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
-                               const ArrayRef<const SrcMapElem>& src_mapping_table,
+                               const ArrayRef<const uint8_t>& method_info,
                                const ArrayRef<const uint8_t>& vmap_table,
                                const ArrayRef<const uint8_t>& cfi_info,
                                const ArrayRef<const LinkerPatch>& patches)
     : CompiledCode(driver, instruction_set, quick_code),
-      frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
+      frame_size_in_bytes_(frame_size_in_bytes),
+      core_spill_mask_(core_spill_mask),
       fp_spill_mask_(fp_spill_mask),
-      src_mapping_table_(
-          driver->GetCompiledMethodStorage()->DeduplicateSrcMappingTable(src_mapping_table)),
+      method_info_(driver->GetCompiledMethodStorage()->DeduplicateMethodInfo(method_info)),
       vmap_table_(driver->GetCompiledMethodStorage()->DeduplicateVMapTable(vmap_table)),
       cfi_info_(driver->GetCompiledMethodStorage()->DeduplicateCFIInfo(cfi_info)),
       patches_(driver->GetCompiledMethodStorage()->DeduplicateLinkerPatches(patches)) {
@@ -126,7 +126,7 @@
     const size_t frame_size_in_bytes,
     const uint32_t core_spill_mask,
     const uint32_t fp_spill_mask,
-    const ArrayRef<const SrcMapElem>& src_mapping_table,
+    const ArrayRef<const uint8_t>& method_info,
     const ArrayRef<const uint8_t>& vmap_table,
     const ArrayRef<const uint8_t>& cfi_info,
     const ArrayRef<const LinkerPatch>& patches) {
@@ -139,7 +139,7 @@
                   frame_size_in_bytes,
                   core_spill_mask,
                   fp_spill_mask,
-                  src_mapping_table,
+                  method_info,
                   vmap_table,
                   cfi_info, patches);
   return ret;
@@ -156,7 +156,7 @@
   storage->ReleaseLinkerPatches(patches_);
   storage->ReleaseCFIInfo(cfi_info_);
   storage->ReleaseVMapTable(vmap_table_);
-  storage->ReleaseSrcMappingTable(src_mapping_table_);
+  storage->ReleaseMethodInfo(method_info_);
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 00e2d62..aa529f8 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -109,57 +109,6 @@
   return lhs.from_ == rhs.from_ && lhs.to_ == rhs.to_;
 }
 
-template <class Allocator>
-class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> {
- public:
-  using std::vector<SrcMapElem, Allocator>::begin;
-  using typename std::vector<SrcMapElem, Allocator>::const_iterator;
-  using std::vector<SrcMapElem, Allocator>::empty;
-  using std::vector<SrcMapElem, Allocator>::end;
-  using std::vector<SrcMapElem, Allocator>::resize;
-  using std::vector<SrcMapElem, Allocator>::shrink_to_fit;
-  using std::vector<SrcMapElem, Allocator>::size;
-
-  explicit SrcMap() {}
-  explicit SrcMap(const Allocator& alloc) : std::vector<SrcMapElem, Allocator>(alloc) {}
-
-  template <class InputIt>
-  SrcMap(InputIt first, InputIt last, const Allocator& alloc)
-      : std::vector<SrcMapElem, Allocator>(first, last, alloc) {}
-
-  void push_back(const SrcMapElem& elem) {
-    if (!empty()) {
-      // Check that the addresses are inserted in sorted order.
-      DCHECK_GE(elem.from_, this->back().from_);
-      // If two consequitive entries map to the same value, ignore the later.
-      // E.g. for map {{0, 1}, {4, 1}, {8, 2}}, all values in [0,8) map to 1.
-      if (elem.to_ == this->back().to_) {
-        return;
-      }
-    }
-    std::vector<SrcMapElem, Allocator>::push_back(elem);
-  }
-
-  // Returns true and the corresponding "to" value if the mapping is found.
-  // Oterwise returns false and 0.
-  std::pair<bool, int32_t> Find(uint32_t from) const {
-    // Finds first mapping such that lb.from_ >= from.
-    auto lb = std::lower_bound(begin(), end(), SrcMapElem {from, INT32_MIN});
-    if (lb != end() && lb->from_ == from) {
-      // Found exact match.
-      return std::make_pair(true, lb->to_);
-    } else if (lb != begin()) {
-      // The previous mapping is still in effect.
-      return std::make_pair(true, (--lb)->to_);
-    } else {
-      // Not found because 'from' is smaller than first entry in the map.
-      return std::make_pair(false, 0);
-    }
-  }
-};
-
-using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
-
 class LinkerPatch {
  public:
   // Note: We explicitly specify the underlying type of the enum because GCC
@@ -420,7 +369,7 @@
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
-                 const ArrayRef<const SrcMapElem>& src_mapping_table,
+                 const ArrayRef<const uint8_t>& method_info,
                  const ArrayRef<const uint8_t>& vmap_table,
                  const ArrayRef<const uint8_t>& cfi_info,
                  const ArrayRef<const LinkerPatch>& patches);
@@ -434,7 +383,7 @@
       const size_t frame_size_in_bytes,
       const uint32_t core_spill_mask,
       const uint32_t fp_spill_mask,
-      const ArrayRef<const SrcMapElem>& src_mapping_table,
+      const ArrayRef<const uint8_t>& method_info,
       const ArrayRef<const uint8_t>& vmap_table,
       const ArrayRef<const uint8_t>& cfi_info,
       const ArrayRef<const LinkerPatch>& patches);
@@ -453,8 +402,8 @@
     return fp_spill_mask_;
   }
 
-  ArrayRef<const SrcMapElem> GetSrcMappingTable() const {
-    return GetArray(src_mapping_table_);
+  ArrayRef<const uint8_t> GetMethodInfo() const {
+    return GetArray(method_info_);
   }
 
   ArrayRef<const uint8_t> GetVmapTable() const {
@@ -476,9 +425,9 @@
   const uint32_t core_spill_mask_;
   // For quick code, a bit mask describing spilled FPR callee-save registers.
   const uint32_t fp_spill_mask_;
-  // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
-  const LengthPrefixedArray<SrcMapElem>* const src_mapping_table_;
-  // For quick code, a uleb128 encoded map from GPR/FPR register to dex register. Size prefixed.
+  // For quick code, method specific information that is not very dedupe friendly (method indices).
+  const LengthPrefixedArray<uint8_t>* const method_info_;
+  // For quick code, holds code infos which contain stack maps, inline information, and etc.
   const LengthPrefixedArray<uint8_t>* const vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 76aeaa5..808e28c 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -370,7 +370,7 @@
         0,
         0,
         0,
-        ArrayRef<const SrcMapElem>(),                // src_mapping_table
+        ArrayRef<const uint8_t>(),                   // method_info
         ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
         ArrayRef<const uint8_t>(),                   // cfi data
         ArrayRef<const LinkerPatch>());
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index a0a8f81..e6a47ba 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -172,8 +172,8 @@
     : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
       dedupe_enabled_(true),
       dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
-      dedupe_src_mapping_table_("dedupe source mapping table",
-                                LengthPrefixedArrayAlloc<SrcMapElem>(swap_space_.get())),
+      dedupe_method_info_("dedupe method info",
+                          LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_vmap_table_("dedupe vmap table",
                          LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
@@ -207,13 +207,13 @@
   ReleaseArrayIfNotDeduplicated(code);
 }
 
-const LengthPrefixedArray<SrcMapElem>* CompiledMethodStorage::DeduplicateSrcMappingTable(
-    const ArrayRef<const SrcMapElem>& src_map) {
-  return AllocateOrDeduplicateArray(src_map, &dedupe_src_mapping_table_);
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateMethodInfo(
+    const ArrayRef<const uint8_t>& src_map) {
+  return AllocateOrDeduplicateArray(src_map, &dedupe_method_info_);
 }
 
-void CompiledMethodStorage::ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map) {
-  ReleaseArrayIfNotDeduplicated(src_map);
+void CompiledMethodStorage::ReleaseMethodInfo(const LengthPrefixedArray<uint8_t>* method_info) {
+  ReleaseArrayIfNotDeduplicated(method_info);
 }
 
 const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index 124b5a6..27011e8 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -29,7 +29,6 @@
 namespace art {
 
 class LinkerPatch;
-class SrcMapElem;
 
 class CompiledMethodStorage {
  public:
@@ -52,9 +51,9 @@
   const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
   void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
 
-  const LengthPrefixedArray<SrcMapElem>* DeduplicateSrcMappingTable(
-      const ArrayRef<const SrcMapElem>& src_map);
-  void ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map);
+  const LengthPrefixedArray<uint8_t>* DeduplicateMethodInfo(
+      const ArrayRef<const uint8_t>& method_info);
+  void ReleaseMethodInfo(const LengthPrefixedArray<uint8_t>* method_info);
 
   const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
   void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
@@ -96,7 +95,7 @@
   bool dedupe_enabled_;
 
   ArrayDedupeSet<uint8_t> dedupe_code_;
-  ArrayDedupeSet<SrcMapElem> dedupe_src_mapping_table_;
+  ArrayDedupeSet<uint8_t> dedupe_method_info_;
   ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
   ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
   ArrayDedupeSet<LinkerPatch> dedupe_linker_patches_;
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index b72d0ac..6572d17 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -51,11 +51,11 @@
       ArrayRef<const uint8_t>(raw_code1),
       ArrayRef<const uint8_t>(raw_code2),
   };
-  const SrcMapElem raw_src_map1[] = { { 1u, 2u }, { 3u, 4u }, { 5u, 6u } };
-  const SrcMapElem raw_src_map2[] = { { 8u, 7u }, { 6u, 5u }, { 4u, 3u }, { 2u, 1u } };
-  ArrayRef<const SrcMapElem> src_map[] = {
-      ArrayRef<const SrcMapElem>(raw_src_map1),
-      ArrayRef<const SrcMapElem>(raw_src_map2),
+  const uint8_t raw_method_info_map1[] = { 1u, 2u, 3u, 4u, 5u, 6u };
+  const uint8_t raw_method_info_map2[] = { 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u };
+  ArrayRef<const uint8_t> method_info[] = {
+      ArrayRef<const uint8_t>(raw_method_info_map1),
+      ArrayRef<const uint8_t>(raw_method_info_map2),
   };
   const uint8_t raw_vmap_table1[] = { 2, 4, 6 };
   const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 };
@@ -85,7 +85,7 @@
   std::vector<CompiledMethod*> compiled_methods;
   compiled_methods.reserve(1u << 7);
   for (auto&& c : code) {
-    for (auto&& s : src_map) {
+    for (auto&& s : method_info) {
       for (auto&& v : vmap_table) {
         for (auto&& f : cfi_info) {
           for (auto&& p : patches) {
@@ -113,7 +113,7 @@
       bool same_patches = ((i ^ j) & patches_bit) == 0u;
       ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data())
           << i << " " << j;
-      ASSERT_EQ(same_src_map, lhs->GetSrcMappingTable().data() == rhs->GetSrcMappingTable().data())
+      ASSERT_EQ(same_src_map, lhs->GetMethodInfo().data() == rhs->GetMethodInfo().data())
           << i << " " << j;
       ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data())
           << i << " " << j;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index cbde587..874e357 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -65,8 +65,6 @@
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
-class SrcMapElem;
-using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
 template<class T> class Handle;
 class TimingLogger;
 class VdexFile;
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index eac46e5..c975944 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -74,8 +74,8 @@
 
     fake_header_code_and_maps_.resize(stack_maps_offset + fake_code_.size());
     MemoryRegion stack_maps_region(&fake_header_code_and_maps_[0], stack_maps_size);
-    stack_maps.FillIn(stack_maps_region);
-    OatQuickMethodHeader method_header(stack_maps_offset, 4 * sizeof(void*), 0u, 0u, code_size);
+    stack_maps.FillInCodeInfo(stack_maps_region);
+    OatQuickMethodHeader method_header(stack_maps_offset, 0u, 4 * sizeof(void*), 0u, 0u, code_size);
     memcpy(&fake_header_code_and_maps_[stack_maps_size], &method_header, sizeof(method_header));
     std::copy(fake_code_.begin(),
               fake_code_.end(),
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 3bd290d..68ec7bd 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -660,8 +660,8 @@
                                                  frame_size,
                                                  main_jni_conv->CoreSpillMask(),
                                                  main_jni_conv->FpSpillMask(),
-                                                 ArrayRef<const SrcMapElem>(),
-                                                 ArrayRef<const uint8_t>(),  // vmap_table.
+                                                 /* method_info */ ArrayRef<const uint8_t>(),
+                                                 /* vmap_table */ ArrayRef<const uint8_t>(),
                                                  ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
                                                  ArrayRef<const LinkerPatch>());
 }
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 233daf4..908cb41 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -87,7 +87,7 @@
         /* frame_size_in_bytes */ 0u,
         /* core_spill_mask */ 0u,
         /* fp_spill_mask */ 0u,
-        /* src_mapping_table */ ArrayRef<const SrcMapElem>(),
+        /* method_info */ ArrayRef<const uint8_t>(),
         /* vmap_table */ ArrayRef<const uint8_t>(),
         /* cfi_info */ ArrayRef<const uint8_t>(),
         patches));
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 97b1374..ead4124 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -485,7 +485,7 @@
   // it is time to update OatHeader::kOatVersion
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
-  EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
+  EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
   EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index afcdf5e..5406ae7 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -326,6 +326,7 @@
     size_relative_call_thunks_(0),
     size_misc_thunks_(0),
     size_vmap_table_(0),
+    size_method_info_(0),
     size_oat_dex_file_location_size_(0),
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
@@ -809,6 +810,7 @@
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
       uint32_t vmap_table_offset = method_header->GetVmapTableOffset();
+      uint32_t method_info_offset = method_header->GetMethodInfoOffset();
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
@@ -819,13 +821,18 @@
           vmap_table_offset += code_offset;
           DCHECK_LT(vmap_table_offset, code_offset);
         }
+        if (method_info_offset != 0u) {
+          method_info_offset += code_offset;
+          DCHECK_LT(method_info_offset, code_offset);
+        }
       } else {
+        CHECK(compiled_method->GetMethodInfo().empty());
         if (kIsVdexEnabled) {
           // We write the offset in the .vdex file.
           DCHECK_EQ(vmap_table_offset, 0u);
           vmap_table_offset = current_quickening_info_offset_;
-          ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
-          current_quickening_info_offset_ += map.size() * sizeof(map.front());
+          ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
+          current_quickening_info_offset_ += vmap_table.size() * sizeof(vmap_table.front());
         } else {
           // We write the offset of the quickening info relative to the code.
           vmap_table_offset += code_offset;
@@ -836,6 +843,7 @@
       uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
       uint32_t fp_spill_mask = compiled_method->GetFpSpillMask();
       *method_header = OatQuickMethodHeader(vmap_table_offset,
+                                            method_info_offset,
                                             frame_size_in_bytes,
                                             core_spill_mask,
                                             fp_spill_mask,
@@ -909,6 +917,9 @@
       if (UNLIKELY(lhs->GetVmapTable().data() != rhs->GetVmapTable().data())) {
         return lhs->GetVmapTable().data() < rhs->GetVmapTable().data();
       }
+      if (UNLIKELY(lhs->GetMethodInfo().data() != rhs->GetMethodInfo().data())) {
+        return lhs->GetMethodInfo().data() < rhs->GetMethodInfo().data();
+      }
       if (UNLIKELY(lhs->GetPatches().data() != rhs->GetPatches().data())) {
         return lhs->GetPatches().data() < rhs->GetPatches().data();
       }
@@ -983,6 +994,44 @@
   SafeMap<const uint8_t*, uint32_t> dedupe_map_;
 };
 
+class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor {
+ public:
+  InitMethodInfoVisitor(OatWriter* writer, size_t offset) : OatDexMethodVisitor(writer, offset) {}
+
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
+    CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
+
+    if (compiled_method != nullptr) {
+      DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
+      DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetMethodInfoOffset(), 0u);
+      ArrayRef<const uint8_t> map = compiled_method->GetMethodInfo();
+      const uint32_t map_size = map.size() * sizeof(map[0]);
+      if (map_size != 0u) {
+        size_t offset = dedupe_map_.GetOrCreate(
+            map.data(),
+            [this, map_size]() {
+              uint32_t new_offset = offset_;
+              offset_ += map_size;
+              return new_offset;
+            });
+        // Code offset is not initialized yet, so set the map offset to 0u-offset.
+        DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
+        oat_class->method_headers_[method_offsets_index_].SetMethodInfoOffset(0u - offset);
+      }
+      ++method_offsets_index_;
+    }
+
+    return true;
+  }
+
+ private:
+  // Deduplication is already done on a pointer basis by the compiler driver,
+  // so we can simply compare the pointers to find out if things are duplicated.
+  SafeMap<const uint8_t*, uint32_t> dedupe_map_;
+};
+
 class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
  public:
   InitImageMethodVisitor(OatWriter* writer, size_t offset)
@@ -1434,7 +1483,7 @@
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
-    if (compiled_method != nullptr) {  // ie. not an abstract method
+    if (compiled_method != nullptr) {  // i.e. not an abstract method
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
@@ -1483,6 +1532,63 @@
   }
 };
 
+class OatWriter::WriteMethodInfoVisitor : public OatDexMethodVisitor {
+ public:
+  WriteMethodInfoVisitor(OatWriter* writer,
+                         OutputStream* out,
+                         const size_t file_offset,
+                         size_t relative_offset)
+    : OatDexMethodVisitor(writer, relative_offset),
+      out_(out),
+      file_offset_(file_offset) {}
+
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
+    const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
+
+    if (compiled_method != nullptr) {  // i.e. not an abstract method
+      size_t file_offset = file_offset_;
+      OutputStream* out = out_;
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetMethodInfoOffset();
+      uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
+      ++method_offsets_index_;
+      DCHECK((compiled_method->GetMethodInfo().size() == 0u && map_offset == 0u) ||
+             (compiled_method->GetMethodInfo().size() != 0u && map_offset != 0u))
+          << compiled_method->GetMethodInfo().size() << " " << map_offset << " "
+          << dex_file_->PrettyMethod(it.GetMemberIndex());
+      if (map_offset != 0u) {
+        // Transform map_offset to actual oat data offset.
+        map_offset = (code_offset - compiled_method->CodeDelta()) - map_offset;
+        DCHECK_NE(map_offset, 0u);
+        DCHECK_LE(map_offset, offset_) << dex_file_->PrettyMethod(it.GetMemberIndex());
+
+        ArrayRef<const uint8_t> map = compiled_method->GetMethodInfo();
+        size_t map_size = map.size() * sizeof(map[0]);
+        if (map_offset == offset_) {
+          // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
+          if (UNLIKELY(!out->WriteFully(map.data(), map_size))) {
+            ReportWriteFailure(it);
+            return false;
+          }
+          offset_ += map_size;
+        }
+      }
+      DCHECK_OFFSET_();
+    }
+
+    return true;
+  }
+
+ private:
+  OutputStream* const out_;
+  size_t const file_offset_;
+
+  void ReportWriteFailure(const ClassDataItemIterator& it) {
+    PLOG(ERROR) << "Failed to write map for "
+        << dex_file_->PrettyMethod(it.GetMemberIndex()) << " to " << out_->GetLocation();
+  }
+};
+
 // Visit all methods from all classes in all dex files with the specified visitor.
 bool OatWriter::VisitDexMethods(DexMethodVisitor* visitor) {
   for (const DexFile* dex_file : *dex_files_) {
@@ -1576,11 +1682,18 @@
   if (!compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
     return offset;
   }
-  InitMapMethodVisitor visitor(this, offset);
-  bool success = VisitDexMethods(&visitor);
-  DCHECK(success);
-  offset = visitor.GetOffset();
-
+  {
+    InitMapMethodVisitor visitor(this, offset);
+    bool success = VisitDexMethods(&visitor);
+    DCHECK(success);
+    offset = visitor.GetOffset();
+  }
+  {
+    InitMethodInfoVisitor visitor(this, offset);
+    bool success = VisitDexMethods(&visitor);
+    DCHECK(success);
+    offset = visitor.GetOffset();
+  }
   return offset;
 }
 
@@ -1920,6 +2033,7 @@
     DO_STAT(size_relative_call_thunks_);
     DO_STAT(size_misc_thunks_);
     DO_STAT(size_vmap_table_);
+    DO_STAT(size_method_info_);
     DO_STAT(size_oat_dex_file_location_size_);
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
@@ -2035,13 +2149,24 @@
 }
 
 size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  size_t vmap_tables_offset = relative_offset;
-  WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
-  if (UNLIKELY(!VisitDexMethods(&visitor))) {
-    return 0;
+  {
+    size_t vmap_tables_offset = relative_offset;
+    WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
+    if (UNLIKELY(!VisitDexMethods(&visitor))) {
+      return 0;
+    }
+    relative_offset = visitor.GetOffset();
+    size_vmap_table_ = relative_offset - vmap_tables_offset;
   }
-  relative_offset = visitor.GetOffset();
-  size_vmap_table_ = relative_offset - vmap_tables_offset;
+  {
+    size_t method_infos_offset = relative_offset;
+    WriteMethodInfoVisitor visitor(this, out, file_offset, relative_offset);
+    if (UNLIKELY(!VisitDexMethods(&visitor))) {
+      return 0;
+    }
+    relative_offset = visitor.GetOffset();
+    size_method_info_ = relative_offset - method_infos_offset;
+  }
 
   return relative_offset;
 }
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5113714..e778f75 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -254,9 +254,11 @@
   class InitOatClassesMethodVisitor;
   class InitCodeMethodVisitor;
   class InitMapMethodVisitor;
+  class InitMethodInfoVisitor;
   class InitImageMethodVisitor;
   class WriteCodeMethodVisitor;
   class WriteMapMethodVisitor;
+  class WriteMethodInfoVisitor;
   class WriteQuickeningInfoMethodVisitor;
 
   // Visit all the methods in all the compiled dex files in their definition order
@@ -425,6 +427,7 @@
   uint32_t size_relative_call_thunks_;
   uint32_t size_misc_thunks_;
   uint32_t size_vmap_table_;
+  uint32_t size_method_info_;
   uint32_t size_oat_dex_file_location_size_;
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 424b850..b7c8075 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -654,8 +654,12 @@
   }
 }
 
-size_t CodeGenerator::ComputeStackMapsSize() {
-  return stack_map_stream_.PrepareForFillIn();
+void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size,
+                                                     size_t* method_info_size) {
+  DCHECK(stack_map_size != nullptr);
+  DCHECK(method_info_size != nullptr);
+  *stack_map_size = stack_map_stream_.PrepareForFillIn();
+  *method_info_size = stack_map_stream_.ComputeMethodInfoSize();
 }
 
 static void CheckCovers(uint32_t dex_pc,
@@ -723,10 +727,13 @@
   }
 }
 
-void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
-  stack_map_stream_.FillIn(region);
+void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region,
+                                   MemoryRegion method_info_region,
+                                   const DexFile::CodeItem& code_item) {
+  stack_map_stream_.FillInCodeInfo(stack_map_region);
+  stack_map_stream_.FillInMethodInfo(method_info_region);
   if (kIsDebugBuild) {
-    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), code_item);
   }
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b912672..ea463ee 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -341,8 +341,10 @@
     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
   }
 
-  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
-  size_t ComputeStackMapsSize();
+  void BuildStackMaps(MemoryRegion stack_map_region,
+                      MemoryRegion method_info_region,
+                      const DexFile::CodeItem& code_item);
+  void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
   size_t GetNumberOfJitRoots() const {
     return jit_string_roots_.size() + jit_class_roots_.size();
   }
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index f5ada52..d75779c 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2000,15 +2000,10 @@
                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
-  // Give d14 and d15 as scratch registers to VIXL.
-  // They are removed from the register allocator in `SetupBlockedRegisters()`.
-  // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
-  // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
-  // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
-  // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
-  // also want to investigate giving those 14 other D registers to the allocator.
-  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
-  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
+  // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
+  // S0-S31, which alias to D0-D15.
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
 }
 
 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
@@ -2074,13 +2069,6 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
-  // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
-  blocked_fpu_registers_[28] = true;
-  blocked_fpu_registers_[29] = true;
-  blocked_fpu_registers_[30] = true;
-  blocked_fpu_registers_[31] = true;
-
   if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
@@ -6549,13 +6537,16 @@
 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
   // TODO(VIXL32): Double check the performance of this implementation.
   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
-  vixl32::SRegister temp_1 = temps.AcquireS();
-  vixl32::SRegister temp_2 = temps.AcquireS();
+  vixl32::Register temp1 = temps.Acquire();
+  ScratchRegisterScope ensure_scratch(
+      this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
+  vixl32::Register temp2(ensure_scratch.GetRegister());
 
-  __ Vldr(temp_1, MemOperand(sp, mem1));
-  __ Vldr(temp_2, MemOperand(sp, mem2));
-  __ Vstr(temp_1, MemOperand(sp, mem2));
-  __ Vstr(temp_2, MemOperand(sp, mem1));
+  int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+  GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
+  GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
+  GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
+  GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
 }
 
 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
@@ -6578,7 +6569,7 @@
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
-    vixl32::SRegister temp = temps.AcquireS();
+    vixl32::Register temp = temps.Acquire();
     __ Vmov(temp, SRegisterFrom(source));
     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
     __ Vmov(SRegisterFrom(destination), temp);
@@ -6637,12 +6628,12 @@
   }
 }
 
-void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
+  __ Push(vixl32::Register(reg));
 }
 
-void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
+  __ Pop(vixl32::Register(reg));
 }
 
 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index ef01a47..781027a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -35,11 +35,11 @@
 #include "aarch32/macro-assembler-aarch32.h"
 #pragma GCC diagnostic pop
 
-// True if VIXL32 should be used for codegen on ARM.
-#ifdef ART_USE_VIXL_ARM_BACKEND
-static constexpr bool kArmUseVIXL32 = true;
-#else
+// Default to use the VIXL-based backend on ARM.
+#ifdef ART_USE_OLD_ARM_BACKEND
 static constexpr bool kArmUseVIXL32 = false;
+#else
+static constexpr bool kArmUseVIXL32 = true;
 #endif
 
 namespace art {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 4814b22..5246dbc 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -5110,12 +5110,34 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kMips64PointerSize).SizeValue();
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->InAt(0).AsRegister<GpuRegister>(),
+                      method_offset);
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kMips64PointerSize));
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->InAt(0).AsRegister<GpuRegister>(),
+                      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->Out().AsRegister<GpuRegister>(),
+                      method_offset);
+  }
 }
 
 }  // namespace mips64
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 644fcee..08f1adf 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -983,7 +983,7 @@
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
-      __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
+      Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       __ movq(temp.AsRegister<CpuRegister>(),
@@ -5531,7 +5531,7 @@
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
-      __ movl(out, Immediate(address));  // Zero-extended.
+      __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -5666,7 +5666,7 @@
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(load->GetString().Get()));
       DCHECK_NE(address, 0u);
-      __ movl(out, Immediate(address));  // Zero-extended.
+      __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
index cfcb276..0c832a5 100644
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -17,23 +17,23 @@
 #include "dex_cache_array_fixups_arm.h"
 
 #include "base/arena_containers.h"
-#ifdef ART_USE_VIXL_ARM_BACKEND
-#include "code_generator_arm_vixl.h"
-#include "intrinsics_arm_vixl.h"
-#else
+#ifdef ART_USE_OLD_ARM_BACKEND
 #include "code_generator_arm.h"
 #include "intrinsics_arm.h"
+#else
+#include "code_generator_arm_vixl.h"
+#include "intrinsics_arm_vixl.h"
 #endif
 #include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 namespace arm {
-#ifdef ART_USE_VIXL_ARM_BACKEND
-typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
-typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType;
-#else
+#ifdef ART_USE_OLD_ARM_BACKEND
 typedef CodeGeneratorARM CodeGeneratorARMType;
 typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType;
+#else
+typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
+typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType;
 #endif
 
 /**
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 583008b..8a813bd 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -892,10 +892,6 @@
     return false;
   }
 
-  if (graph_->GetInstructionSet() == kMips64) {
-    // TODO: Support HClassTableGet for mips64.
-    return false;
-  }
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   PointerSize pointer_size = class_linker->GetImagePointerSize();
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9a6dd98..1e17c6e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3018,13 +3018,14 @@
       mirror::Object* boxed = info.cache->Get(value + (-info.low));
       DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
-      __ movl(out, Immediate(address));
+      __ movl(out, Immediate(static_cast<int32_t>(address)));
     } else {
       // Allocate and initialize a new j.l.Integer.
       // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
       // JIT object table.
+      CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
       uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
-      __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+      __ movl(argument, Immediate(static_cast<int32_t>(address)));
       codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
       CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
       __ movl(Address(out, info.value_offset), Immediate(value));
@@ -3039,13 +3040,20 @@
     // If the value is within the bounds, load the j.l.Integer directly from the array.
     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
-    __ movl(out, Address(out, TIMES_4, data_offset + address));
+    if (data_offset + address <= std::numeric_limits<int32_t>::max()) {
+      __ movl(out, Address(out, TIMES_4, data_offset + address));
+    } else {
+      CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0));
+      __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address)));
+      __ movl(out, Address(temp, out, TIMES_4, 0));
+    }
     __ MaybeUnpoisonHeapReference(out);
     __ jmp(&done);
     __ Bind(&allocate);
     // Otherwise allocate and initialize a new j.l.Integer.
+    CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
     address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+    __ movl(argument, Immediate(static_cast<int32_t>(address)));
     codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
     __ movl(Address(out, info.value_offset), in);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 0e02311..490e50c 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -24,17 +24,17 @@
 #include "optimizing/code_generator.h"
 #include "optimizing/optimizing_unit_test.h"
 #include "utils/assembler.h"
-#ifdef ART_USE_VIXL_ARM_BACKEND
-#include "utils/arm/assembler_arm_vixl.h"
-#else
+#ifdef ART_USE_OLD_ARM_BACKEND
 #include "utils/arm/assembler_thumb2.h"
+#else
+#include "utils/arm/assembler_arm_vixl.h"
 #endif
 #include "utils/mips/assembler_mips.h"
 #include "utils/mips64/assembler_mips64.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
 
-#ifdef ART_USE_VIXL_ARM_BACKEND
+#ifndef ART_USE_OLD_ARM_BACKEND
 namespace vixl32 = vixl::aarch32;
 
 using vixl32::r0;
@@ -196,7 +196,15 @@
       expected_cfi_kThumb2_adjust,
       expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust));
   SetUpFrame(kThumb2);
-#ifdef ART_USE_VIXL_ARM_BACKEND
+#ifdef ART_USE_OLD_ARM_BACKEND
+#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())->
+  Label target;
+  __ CompareAndBranchIfZero(arm::R0, &target);
+  // Push the target out of range of CBZ.
+  for (size_t i = 0; i != 65; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+#else
 #define __ down_cast<arm::ArmVIXLAssembler*>(GetCodeGenerator() \
     ->GetAssembler())->GetVIXLAssembler()->
   vixl32::Label target;
@@ -205,14 +213,6 @@
   for (size_t i = 0; i != 65; ++i) {
     __ Ldr(r0, vixl32::MemOperand(r0));
   }
-#else
-#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())->
-  Label target;
-  __ CompareAndBranchIfZero(arm::R0, &target);
-  // Push the target out of range of CBZ.
-  for (size_t i = 0; i != 65; ++i) {
-    __ ldr(arm::R0, arm::Address(arm::R0));
-  }
 #endif
   __ Bind(&target);
 #undef __
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 82670c3..d84fe6c 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -223,15 +223,15 @@
 // 0x00000040: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
-#ifdef ART_USE_VIXL_ARM_BACKEND
-    // VIXL emits an extra 2 bytes here for a 32-bit beq as there is no
-    // optimistic 16-bit emit and subsequent fixup for out of reach targets
-    // as with the current assembler.
-    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, 0x00, 0xF0,
-    0x41, 0x80, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
-#else
+#ifdef ART_USE_OLD_ARM_BACKEND
     0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28,
     0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+#else
+    // VIXL emits an extra 2 bytes here for a 32-bit beq as there is no
+    // optimistic 16-bit emit and subsequent fixup for out of reach targets
+    // as with the old assembler.
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, 0x00, 0xF0,
+    0x41, 0x80, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
 #endif
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
@@ -247,10 +247,10 @@
 };
 static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
     0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
-#ifdef ART_USE_VIXL_ARM_BACKEND
-    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
-#else
+#ifdef ART_USE_OLD_ARM_BACKEND
     0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A,
+#else
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
 #endif
     0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
     0x0E, 0x40,
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d6153b0..23ccd9e 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -856,8 +856,15 @@
                                          const DexFile::CodeItem* code_item) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
-  stack_map.resize(codegen->ComputeStackMapsSize());
-  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
+  ArenaVector<uint8_t> method_info(arena->Adapter(kArenaAllocStackMaps));
+  size_t stack_map_size = 0;
+  size_t method_info_size = 0;
+  codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
+  stack_map.resize(stack_map_size);
+  method_info.resize(method_info_size);
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()),
+                          MemoryRegion(method_info.data(), method_info.size()),
+                          *code_item);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
@@ -869,7 +876,7 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(),
+      ArrayRef<const uint8_t>(method_info),
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
@@ -1200,7 +1207,9 @@
     }
   }
 
-  size_t stack_map_size = codegen->ComputeStackMapsSize();
+  size_t stack_map_size = 0;
+  size_t method_info_size = 0;
+  codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
   size_t number_of_roots = codegen->GetNumberOfJitRoots();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
@@ -1216,20 +1225,30 @@
     return false;
   }
   uint8_t* stack_map_data = nullptr;
+  uint8_t* method_info_data = nullptr;
   uint8_t* roots_data = nullptr;
-  uint32_t data_size = code_cache->ReserveData(
-      self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data);
+  uint32_t data_size = code_cache->ReserveData(self,
+                                               stack_map_size,
+                                               method_info_size,
+                                               number_of_roots,
+                                               method,
+                                               &stack_map_data,
+                                               &method_info_data,
+                                               &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
   MaybeRecordStat(MethodCompilationStat::kCompiled);
-  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
+                          MemoryRegion(method_info_data, method_info_size),
+                          *code_item);
   codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data);
 
   const void* code = code_cache->CommitCode(
       self,
       method,
       stack_map_data,
+      method_info_data,
       roots_data,
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 4d12ad6..b7840d7 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -152,6 +152,9 @@
   encoding.location_catalog.num_entries = location_catalog_entries_.size();
   encoding.location_catalog.num_bytes = ComputeDexRegisterLocationCatalogSize();
   encoding.inline_info.num_entries = inline_infos_.size();
+  // Must be done before calling ComputeInlineInfoEncoding since ComputeInlineInfoEncoding requires
+  // dex_method_index_idx to be filled in.
+  PrepareMethodIndices();
   ComputeInlineInfoEncoding(&encoding.inline_info.encoding,
                             encoding.dex_register_map.num_bytes);
   CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
@@ -245,7 +248,7 @@
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
       InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       if (inline_entry.method == nullptr) {
-        method_index_max = std::max(method_index_max, inline_entry.method_index);
+        method_index_max = std::max(method_index_max, inline_entry.dex_method_index_idx);
         extra_data_max = std::max(extra_data_max, 1u);
       } else {
         method_index_max = std::max(
@@ -288,7 +291,25 @@
   return entry.offset;
 }
 
-void StackMapStream::FillIn(MemoryRegion region) {
+void StackMapStream::FillInMethodInfo(MemoryRegion region) {
+  {
+    MethodInfo info(region.begin(), method_indices_.size());
+    for (size_t i = 0; i < method_indices_.size(); ++i) {
+      info.SetMethodIndex(i, method_indices_[i]);
+    }
+  }
+  if (kIsDebugBuild) {
+    // Check the data matches.
+    MethodInfo info(region.begin());
+    const size_t count = info.NumMethodIndices();
+    DCHECK_EQ(count, method_indices_.size());
+    for (size_t i = 0; i < count; ++i) {
+      DCHECK_EQ(info.GetMethodIndex(i), method_indices_[i]);
+    }
+  }
+}
+
+void StackMapStream::FillInCodeInfo(MemoryRegion region) {
   DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
   DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
 
@@ -345,7 +366,7 @@
       InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx));
       invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset);
       invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type);
-      invoke_info.SetMethodIndex(encoding.invoke_info.encoding, entry.dex_method_index);
+      invoke_info.SetMethodIndexIdx(encoding.invoke_info.encoding, entry.dex_method_index_idx);
       ++invoke_info_idx;
     }
 
@@ -364,7 +385,7 @@
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
         InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
         if (inline_entry.method != nullptr) {
-          inline_info.SetMethodIndexAtDepth(
+          inline_info.SetMethodIndexIdxAtDepth(
               encoding.inline_info.encoding,
               depth,
               High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
@@ -373,9 +394,9 @@
               depth,
               Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
         } else {
-          inline_info.SetMethodIndexAtDepth(encoding.inline_info.encoding,
-                                            depth,
-                                            inline_entry.method_index);
+          inline_info.SetMethodIndexIdxAtDepth(encoding.inline_info.encoding,
+                                               depth,
+                                               inline_entry.dex_method_index_idx);
           inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1);
         }
         inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc);
@@ -533,6 +554,29 @@
   return dedupe.size();
 }
 
+void StackMapStream::PrepareMethodIndices() {
+  CHECK(method_indices_.empty());
+  method_indices_.resize(stack_maps_.size() + inline_infos_.size());
+  ArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
+  for (StackMapEntry& stack_map : stack_maps_) {
+    const size_t index = dedupe.size();
+    const uint32_t method_index = stack_map.dex_method_index;
+    if (method_index != DexFile::kDexNoIndex) {
+      stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
+      method_indices_[index] = method_index;
+    }
+  }
+  for (InlineInfoEntry& inline_info : inline_infos_) {
+    const size_t index = dedupe.size();
+    const uint32_t method_index = inline_info.method_index;
+    CHECK_NE(method_index, DexFile::kDexNoIndex);
+    inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
+    method_indices_[index] = method_index;
+  }
+  method_indices_.resize(dedupe.size());
+}
+
+
 size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
   // Preallocate memory since we do not want it to move (the dedup map will point into it).
   const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -590,7 +634,8 @@
       DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_),
                 entry.native_pc_code_offset.Uint32Value(instruction_set_));
       DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type);
-      DCHECK_EQ(invoke_info.GetMethodIndex(encoding.invoke_info.encoding), entry.dex_method_index);
+      DCHECK_EQ(invoke_info.GetMethodIndexIdx(encoding.invoke_info.encoding),
+                entry.dex_method_index_idx);
       invoke_info_index++;
     }
     CheckDexRegisterMap(code_info,
@@ -615,8 +660,10 @@
           DCHECK_EQ(inline_info.GetArtMethodAtDepth(encoding.inline_info.encoding, d),
                     inline_entry.method);
         } else {
-          DCHECK_EQ(inline_info.GetMethodIndexAtDepth(encoding.inline_info.encoding, d),
-                    inline_entry.method_index);
+          const size_t method_index_idx =
+              inline_info.GetMethodIndexIdxAtDepth(encoding.inline_info.encoding, d);
+          DCHECK_EQ(method_index_idx, inline_entry.dex_method_index_idx);
+          DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index);
         }
 
         CheckDexRegisterMap(code_info,
@@ -633,4 +680,9 @@
   }
 }
 
+size_t StackMapStream::ComputeMethodInfoSize() const {
+  DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before " << __FUNCTION__;
+  return MethodInfo::ComputeSize(method_indices_.size());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 4225a87..e6471e1 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -22,6 +22,7 @@
 #include "base/hash_map.h"
 #include "base/value_object.h"
 #include "memory_region.h"
+#include "method_info.h"
 #include "nodes.h"
 #include "stack_map.h"
 
@@ -70,6 +71,7 @@
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
         register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+        method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
         dex_pc_max_(0),
@@ -120,6 +122,7 @@
     size_t dex_register_map_index;
     InvokeType invoke_type;
     uint32_t dex_method_index;
+    uint32_t dex_method_index_idx;  // Index into dex method index table.
   };
 
   struct InlineInfoEntry {
@@ -128,6 +131,7 @@
     uint32_t method_index;
     DexRegisterMapEntry dex_register_entry;
     size_t dex_register_map_index;
+    uint32_t dex_method_index_idx;  // Index into the dex method index table.
   };
 
   void BeginStackMapEntry(uint32_t dex_pc,
@@ -164,7 +168,10 @@
   // Prepares the stream to fill in a memory region. Must be called before FillIn.
   // Returns the size (in bytes) needed to store this stream.
   size_t PrepareForFillIn();
-  void FillIn(MemoryRegion region);
+  void FillInCodeInfo(MemoryRegion region);
+  void FillInMethodInfo(MemoryRegion region);
+
+  size_t ComputeMethodInfoSize() const;
 
  private:
   size_t ComputeDexRegisterLocationCatalogSize() const;
@@ -180,6 +187,9 @@
   // Returns the number of unique register masks.
   size_t PrepareRegisterMasks();
 
+  // Prepare and deduplicate method indices.
+  void PrepareMethodIndices();
+
   // Deduplicate entry if possible and return the corresponding index into dex_register_entries_
   // array. If entry is not a duplicate, a new entry is added to dex_register_entries_.
   size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry);
@@ -232,6 +242,7 @@
   ArenaVector<InlineInfoEntry> inline_infos_;
   ArenaVector<uint8_t> stack_masks_;
   ArenaVector<uint32_t> register_masks_;
+  ArenaVector<uint32_t> method_indices_;
   ArenaVector<DexRegisterMapEntry> dex_register_entries_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 330f7f2..a842c6e 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -60,7 +60,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -173,7 +173,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -433,7 +433,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -519,7 +519,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -611,7 +611,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -672,7 +672,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo ci(region);
   CodeInfoEncoding encoding = ci.ExtractEncoding();
@@ -721,7 +721,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -823,7 +823,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo ci(region);
   CodeInfoEncoding encoding = ci.ExtractEncoding();
@@ -950,7 +950,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -979,11 +979,16 @@
   stream.AddInvoke(kDirect, 65535);
   stream.EndStackMapEntry();
 
-  const size_t size = stream.PrepareForFillIn();
-  MemoryRegion region(arena.Alloc(size, kArenaAllocMisc), size);
-  stream.FillIn(region);
+  const size_t code_info_size = stream.PrepareForFillIn();
+  MemoryRegion code_info_region(arena.Alloc(code_info_size, kArenaAllocMisc), code_info_size);
+  stream.FillInCodeInfo(code_info_region);
 
-  CodeInfo code_info(region);
+  const size_t method_info_size = stream.ComputeMethodInfoSize();
+  MemoryRegion method_info_region(arena.Alloc(method_info_size, kArenaAllocMisc), method_info_size);
+  stream.FillInMethodInfo(method_info_region);
+
+  CodeInfo code_info(code_info_region);
+  MethodInfo method_info(method_info_region.begin());
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
   ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding));
 
@@ -996,13 +1001,13 @@
   EXPECT_TRUE(invoke2.IsValid());
   EXPECT_TRUE(invoke3.IsValid());
   EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper);
-  EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding), 1u);
+  EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding, method_info), 1u);
   EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u);
   EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic);
-  EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding), 3u);
+  EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding, method_info), 3u);
   EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u);
   EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect);
-  EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding), 65535u);
+  EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding, method_info), 65535u);
   EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u);
 }
 
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index e5eef37..6afc3dd 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -230,6 +230,7 @@
   if (!CanHoldStoreOffsetThumb(type, offset)) {
     CHECK_NE(base.GetCode(), kIpCode);
     if ((reg.GetCode() != kIpCode) &&
+        (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
       tmp_reg = temps.Acquire();
     } else {
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index e767023..878d0f2 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -596,7 +596,7 @@
       kByteKindStackMapInlineInfoIndex,
       kByteKindStackMapRegisterMaskIndex,
       kByteKindStackMapStackMaskIndex,
-      kByteKindInlineInfoMethodIndex,
+      kByteKindInlineInfoMethodIndexIdx,
       kByteKindInlineInfoDexPc,
       kByteKindInlineInfoExtraData,
       kByteKindInlineInfoDexRegisterMap,
@@ -605,7 +605,7 @@
       // Special ranges for std::accumulate convenience.
       kByteKindStackMapFirst = kByteKindStackMapNativePc,
       kByteKindStackMapLast = kByteKindStackMapStackMaskIndex,
-      kByteKindInlineInfoFirst = kByteKindInlineInfoMethodIndex,
+      kByteKindInlineInfoFirst = kByteKindInlineInfoMethodIndexIdx,
       kByteKindInlineInfoLast = kByteKindInlineInfoIsLast,
     };
     int64_t bits[kByteKindCount] = {};
@@ -685,8 +685,8 @@
         {
           ScopedIndentation indent1(&os);
           Dump(os,
-               "InlineInfoMethodIndex         ",
-               bits[kByteKindInlineInfoMethodIndex],
+               "InlineInfoMethodIndexIdx      ",
+               bits[kByteKindInlineInfoMethodIndexIdx],
                inline_info_bits,
                "inline info");
           Dump(os,
@@ -1363,7 +1363,8 @@
         CodeInfo code_info(raw_code_info);
         DCHECK(code_item != nullptr);
         ScopedIndentation indent1(vios);
-        DumpCodeInfo(vios, code_info, oat_method, *code_item);
+        MethodInfo method_info = oat_method.GetOatQuickMethodHeader()->GetOptimizedMethodInfo();
+        DumpCodeInfo(vios, code_info, oat_method, *code_item, method_info);
       }
     } else if (IsMethodGeneratedByDexToDexCompiler(oat_method, code_item)) {
       // We don't encode the size in the table, so just emit that we have quickened
@@ -1379,12 +1380,14 @@
   void DumpCodeInfo(VariableIndentationOutputStream* vios,
                     const CodeInfo& code_info,
                     const OatFile::OatMethod& oat_method,
-                    const DexFile::CodeItem& code_item) {
+                    const DexFile::CodeItem& code_item,
+                    const MethodInfo& method_info) {
     code_info.Dump(vios,
                    oat_method.GetCodeOffset(),
                    code_item.registers_size_,
                    options_.dump_code_info_stack_maps_,
-                   instruction_set_);
+                   instruction_set_,
+                   method_info);
   }
 
   void DumpVregLocations(std::ostream& os, const OatFile::OatMethod& oat_method,
@@ -1592,6 +1595,7 @@
     } else if (!bad_input && IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
       // The optimizing compiler outputs its CodeInfo data in the vmap table.
       StackMapsHelper helper(oat_method.GetVmapTable(), instruction_set_);
+      MethodInfo method_info(oat_method.GetOatQuickMethodHeader()->GetOptimizedMethodInfo());
       {
         CodeInfoEncoding encoding(helper.GetEncoding());
         StackMapEncoding stack_map_encoding(encoding.stack_map.encoding);
@@ -1652,8 +1656,9 @@
           const size_t num_inline_infos = encoding.inline_info.num_entries;
           if (num_inline_infos > 0u) {
             stats_.AddBits(
-                Stats::kByteKindInlineInfoMethodIndex,
-                encoding.inline_info.encoding.GetMethodIndexEncoding().BitSize() * num_inline_infos);
+                Stats::kByteKindInlineInfoMethodIndexIdx,
+                encoding.inline_info.encoding.GetMethodIndexIdxEncoding().BitSize() *
+                    num_inline_infos);
             stats_.AddBits(
                 Stats::kByteKindInlineInfoDexPc,
                 encoding.inline_info.encoding.GetDexPcEncoding().BitSize() * num_inline_infos);
@@ -1679,6 +1684,7 @@
           stack_map.Dump(vios,
                          helper.GetCodeInfo(),
                          helper.GetEncoding(),
+                         method_info,
                          oat_method.GetCodeOffset(),
                          code_item->registers_size_,
                          instruction_set_);
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 4041f5e..f536c72 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -27,6 +27,22 @@
 
 namespace art {
 
+// Like sizeof, but count how many bits a type takes. Pass type explicitly.
+template <typename T>
+constexpr size_t BitSizeOf() {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  static_assert(sizeof(T) == sizeof(unsigned_type), "Unexpected type size mismatch!");
+  static_assert(std::numeric_limits<unsigned_type>::radix == 2, "Unexpected radix!");
+  return std::numeric_limits<unsigned_type>::digits;
+}
+
+// Like sizeof, but count how many bits a type takes. Infers type from parameter.
+template <typename T>
+constexpr size_t BitSizeOf(T /*x*/) {
+  return BitSizeOf<T>();
+}
+
 template<typename T>
 constexpr int CLZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
@@ -37,6 +53,14 @@
   return (sizeof(T) == sizeof(uint32_t)) ? __builtin_clz(x) : __builtin_clzll(x);
 }
 
+// Similar to CLZ except that on zero input it returns bitwidth and supports signed integers.
+template<typename T>
+constexpr int JAVASTYLE_CLZ(T x) {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  return (x == 0) ? BitSizeOf<T>() : CLZ(static_cast<unsigned_type>(x));
+}
+
 template<typename T>
 constexpr int CTZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
@@ -48,12 +72,32 @@
   return (sizeof(T) == sizeof(uint32_t)) ? __builtin_ctz(x) : __builtin_ctzll(x);
 }
 
+// Similar to CTZ except that on zero input it returns bitwidth and supports signed integers.
+template<typename T>
+constexpr int JAVASTYLE_CTZ(T x) {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  return (x == 0) ? BitSizeOf<T>() : CTZ(static_cast<unsigned_type>(x));
+}
+
 // Return the number of 1-bits in `x`.
 template<typename T>
 constexpr int POPCOUNT(T x) {
   return (sizeof(T) == sizeof(uint32_t)) ? __builtin_popcount(x) : __builtin_popcountll(x);
 }
 
+// Swap bytes.
+template<typename T>
+constexpr T BSWAP(T x) {
+  if (sizeof(T) == sizeof(uint16_t)) {
+    return __builtin_bswap16(x);
+  } else if (sizeof(T) == sizeof(uint32_t)) {
+    return __builtin_bswap32(x);
+  } else {
+    return __builtin_bswap64(x);
+  }
+}
+
 // Find the bit position of the most significant bit (0-based), or -1 if there were no bits set.
 template <typename T>
 constexpr ssize_t MostSignificantBit(T value) {
@@ -169,22 +213,6 @@
 #define DCHECK_ALIGNED_PARAM(value, alignment) \
   DCHECK(::art::IsAlignedParam(value, alignment)) << reinterpret_cast<const void*>(value)
 
-// Like sizeof, but count how many bits a type takes. Pass type explicitly.
-template <typename T>
-constexpr size_t BitSizeOf() {
-  static_assert(std::is_integral<T>::value, "T must be integral");
-  using unsigned_type = typename std::make_unsigned<T>::type;
-  static_assert(sizeof(T) == sizeof(unsigned_type), "Unexpected type size mismatch!");
-  static_assert(std::numeric_limits<unsigned_type>::radix == 2, "Unexpected radix!");
-  return std::numeric_limits<unsigned_type>::digits;
-}
-
-// Like sizeof, but count how many bits a type takes. Infers type from parameter.
-template <typename T>
-constexpr size_t BitSizeOf(T /*x*/) {
-  return BitSizeOf<T>();
-}
-
 inline uint16_t Low16Bits(uint32_t value) {
   return static_cast<uint16_t>(value);
 }
@@ -363,6 +391,59 @@
       HighToLowBitIterator<T>(bits), HighToLowBitIterator<T>());
 }
 
+// Returns value with bit set in lowest one-bit position or 0 if 0.  (java.lang.X.lowestOneBit).
+template <typename kind>
+inline static kind LowestOneBitValue(kind opnd) {
+  // Hacker's Delight, Section 2-1
+  return opnd & -opnd;
+}
+
+// Returns value with bit set in hightest one-bit position or 0 if 0.  (java.lang.X.highestOneBit).
+template <typename T>
+inline static T HighestOneBitValue(T opnd) {
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  T res;
+  if (opnd == 0) {
+    res = 0;
+  } else {
+    int bit_position = BitSizeOf<T>() - (CLZ(static_cast<unsigned_type>(opnd)) + 1);
+    res = static_cast<T>(UINT64_C(1) << bit_position);
+  }
+  return res;
+}
+
+// Rotate bits.
+template <typename T, bool left>
+inline static T Rot(T opnd, int distance) {
+  int mask = BitSizeOf<T>() - 1;
+  int unsigned_right_shift = left ? (-distance & mask) : (distance & mask);
+  int signed_left_shift = left ? (distance & mask) : (-distance & mask);
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  return (static_cast<unsigned_type>(opnd) >> unsigned_right_shift) | (opnd << signed_left_shift);
+}
+
+// TUNING: use rbit for arm/arm64
+inline static uint32_t ReverseBits32(uint32_t opnd) {
+  // Hacker's Delight 7-1
+  opnd = ((opnd >>  1) & 0x55555555) | ((opnd & 0x55555555) <<  1);
+  opnd = ((opnd >>  2) & 0x33333333) | ((opnd & 0x33333333) <<  2);
+  opnd = ((opnd >>  4) & 0x0F0F0F0F) | ((opnd & 0x0F0F0F0F) <<  4);
+  opnd = ((opnd >>  8) & 0x00FF00FF) | ((opnd & 0x00FF00FF) <<  8);
+  opnd = ((opnd >> 16)) | ((opnd) << 16);
+  return opnd;
+}
+
+// TUNING: use rbit for arm/arm64
+inline static uint64_t ReverseBits64(uint64_t opnd) {
+  // Hacker's Delight 7-1
+  opnd = (opnd & 0x5555555555555555L) << 1 | ((opnd >> 1) & 0x5555555555555555L);
+  opnd = (opnd & 0x3333333333333333L) << 2 | ((opnd >> 2) & 0x3333333333333333L);
+  opnd = (opnd & 0x0f0f0f0f0f0f0f0fL) << 4 | ((opnd >> 4) & 0x0f0f0f0f0f0f0f0fL);
+  opnd = (opnd & 0x00ff00ff00ff00ffL) << 8 | ((opnd >> 8) & 0x00ff00ff00ff00ffL);
+  opnd = (opnd << 48) | ((opnd & 0xffff0000L) << 16) | ((opnd >> 16) & 0xffff0000L) | (opnd >> 48);
+  return opnd;
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_BIT_UTILS_H_
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 3bc49b8..ba8cec3 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -43,6 +43,7 @@
 namespace art {
 
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
+                                    const MethodInfo& method_info,
                                     const InlineInfo& inline_info,
                                     const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
@@ -56,7 +57,7 @@
     return inline_info.GetArtMethodAtDepth(encoding, inlining_depth);
   }
 
-  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
+  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, method_info, inlining_depth);
   if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) {
     // "charAt" special case. It is the only non-leaf method we inline across dex files.
     ArtMethod* inlined_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt);
@@ -68,6 +69,7 @@
   ArtMethod* caller = outer_method;
   if (inlining_depth != 0) {
     caller = GetResolvedMethod(outer_method,
+                               method_info,
                                inline_info,
                                encoding,
                                inlining_depth - 1);
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 9c453ec..b5130d7 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -201,12 +201,14 @@
       DCHECK(current_code->IsOptimized());
       uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
       CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+      MethodInfo method_info = current_code->GetOptimizedMethodInfo();
       CodeInfoEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
       if (stack_map.HasInlineInfo(encoding.stack_map.encoding)) {
         InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
         caller = GetResolvedMethod(outer_method,
+                                   method_info,
                                    inline_info,
                                    encoding.inline_info.encoding,
                                    inline_info.GetDepth(encoding.inline_info.encoding) - 1);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 3fd20a6..25073a8 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -372,10 +372,11 @@
     uintptr_t outer_pc_offset = current_code->NativeQuickPcOffset(outer_pc);
     CodeInfo code_info = current_code->GetOptimizedCodeInfo();
     CodeInfoEncoding encoding = code_info.ExtractEncoding();
+    MethodInfo method_info = current_code->GetOptimizedMethodInfo();
     InvokeInfo invoke(code_info.GetInvokeInfoForNativePcOffset(outer_pc_offset, encoding));
     if (invoke.IsValid()) {
       *invoke_type = static_cast<InvokeType>(invoke.GetInvokeType(encoding.invoke_info.encoding));
-      *dex_method_index = invoke.GetMethodIndex(encoding.invoke_info.encoding);
+      *dex_method_index = invoke.GetMethodIndex(encoding.invoke_info.encoding, method_info);
       return true;
     }
     return false;
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index ff0c20e..0ae7307 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -20,19 +20,29 @@
 namespace art {
 namespace interpreter {
 
-#define BINARY_SIMPLE_INTRINSIC(name, op, get, set, offset)  \
-static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,    \
-                               const Instruction* inst,      \
-                               uint16_t inst_data,           \
-                               JValue* result_register)      \
-    REQUIRES_SHARED(Locks::mutator_lock_) {                  \
-  uint32_t arg[Instruction::kMaxVarArgRegs] = {};            \
-  inst->GetVarArgs(arg, inst_data);                          \
-  result_register->set(op(shadow_frame->get(arg[0]), shadow_frame->get(arg[offset]))); \
-  return true;                                               \
+
+#define BINARY_INTRINSIC(name, op, get1, get2, set)                 \
+static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,           \
+                               const Instruction* inst,             \
+                               uint16_t inst_data,                  \
+                               JValue* result_register)             \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                         \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};                   \
+  inst->GetVarArgs(arg, inst_data);                                 \
+  result_register->set(op(shadow_frame->get1, shadow_frame->get2)); \
+  return true;                                                      \
 }
 
-#define UNARY_SIMPLE_INTRINSIC(name, op, get, set)           \
+#define BINARY_II_INTRINSIC(name, op, set) \
+    BINARY_INTRINSIC(name, op, GetVReg(arg[0]), GetVReg(arg[1]), set)
+
+#define BINARY_JJ_INTRINSIC(name, op, set) \
+    BINARY_INTRINSIC(name, op, GetVRegLong(arg[0]), GetVRegLong(arg[2]), set)
+
+#define BINARY_JI_INTRINSIC(name, op, set) \
+    BINARY_INTRINSIC(name, op, GetVRegLong(arg[0]), GetVReg(arg[2]), set)
+
+#define UNARY_INTRINSIC(name, op, get, set)                  \
 static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,    \
                                const Instruction* inst,      \
                                uint16_t inst_data,           \
@@ -44,40 +54,126 @@
   return true;                                               \
 }
 
+
+// java.lang.Integer.reverse(I)I
+UNARY_INTRINSIC(MterpIntegerReverse, ReverseBits32, GetVReg, SetI);
+
+// java.lang.Integer.reverseBytes(I)I
+UNARY_INTRINSIC(MterpIntegerReverseBytes, BSWAP, GetVReg, SetI);
+
+// java.lang.Integer.bitCount(I)I
+UNARY_INTRINSIC(MterpIntegerBitCount, POPCOUNT, GetVReg, SetI);
+
+// java.lang.Integer.compare(II)I
+BINARY_II_INTRINSIC(MterpIntegerCompare, Compare, SetI);
+
+// java.lang.Integer.highestOneBit(I)I
+UNARY_INTRINSIC(MterpIntegerHighestOneBit, HighestOneBitValue, GetVReg, SetI);
+
+// java.lang.Integer.LowestOneBit(I)I
+UNARY_INTRINSIC(MterpIntegerLowestOneBit, LowestOneBitValue, GetVReg, SetI);
+
+// java.lang.Integer.numberOfLeadingZeros(I)I
+UNARY_INTRINSIC(MterpIntegerNumberOfLeadingZeros, JAVASTYLE_CLZ, GetVReg, SetI);
+
+// java.lang.Integer.numberOfTrailingZeros(I)I
+UNARY_INTRINSIC(MterpIntegerNumberOfTrailingZeros, JAVASTYLE_CTZ, GetVReg, SetI);
+
+// java.lang.Integer.rotateRight(II)I
+BINARY_II_INTRINSIC(MterpIntegerRotateRight, (Rot<int32_t, false>), SetI);
+
+// java.lang.Integer.rotateLeft(II)I
+BINARY_II_INTRINSIC(MterpIntegerRotateLeft, (Rot<int32_t, true>), SetI);
+
+// java.lang.Integer.signum(I)I
+UNARY_INTRINSIC(MterpIntegerSignum, Signum, GetVReg, SetI);
+
+// java.lang.Long.reverse(I)I
+UNARY_INTRINSIC(MterpLongReverse, ReverseBits64, GetVRegLong, SetJ);
+
+// java.lang.Long.reverseBytes(J)J
+UNARY_INTRINSIC(MterpLongReverseBytes, BSWAP, GetVRegLong, SetJ);
+
+// java.lang.Long.bitCount(J)I
+UNARY_INTRINSIC(MterpLongBitCount, POPCOUNT, GetVRegLong, SetI);
+
+// java.lang.Long.compare(JJ)I
+BINARY_JJ_INTRINSIC(MterpLongCompare, Compare, SetI);
+
+// java.lang.Long.highestOneBit(J)J
+UNARY_INTRINSIC(MterpLongHighestOneBit, HighestOneBitValue, GetVRegLong, SetJ);
+
+// java.lang.Long.lowestOneBit(J)J
+UNARY_INTRINSIC(MterpLongLowestOneBit, LowestOneBitValue, GetVRegLong, SetJ);
+
+// java.lang.Long.numberOfLeadingZeros(J)I
+UNARY_INTRINSIC(MterpLongNumberOfLeadingZeros, JAVASTYLE_CLZ, GetVRegLong, SetJ);
+
+// java.lang.Long.numberOfTrailingZeros(J)I
+UNARY_INTRINSIC(MterpLongNumberOfTrailingZeros, JAVASTYLE_CTZ, GetVRegLong, SetJ);
+
+// java.lang.Long.rotateRight(JI)J
+BINARY_JJ_INTRINSIC(MterpLongRotateRight, (Rot<int64_t, false>), SetJ);
+
+// java.lang.Long.rotateLeft(JI)J
+BINARY_JJ_INTRINSIC(MterpLongRotateLeft, (Rot<int64_t, true>), SetJ);
+
+// java.lang.Long.signum(J)I
+UNARY_INTRINSIC(MterpLongSignum, Signum, GetVRegLong, SetI);
+
+// java.lang.Short.reverseBytes(S)S
+UNARY_INTRINSIC(MterpShortReverseBytes, BSWAP, GetVRegShort, SetS);
+
 // java.lang.Math.min(II)I
-BINARY_SIMPLE_INTRINSIC(MterpMathMinIntInt, std::min, GetVReg, SetI, 1);
+BINARY_II_INTRINSIC(MterpMathMinIntInt, std::min, SetI);
+
 // java.lang.Math.min(JJ)J
-BINARY_SIMPLE_INTRINSIC(MterpMathMinLongLong, std::min, GetVRegLong, SetJ, 2);
+BINARY_JJ_INTRINSIC(MterpMathMinLongLong, std::min, SetJ);
+
 // java.lang.Math.max(II)I
-BINARY_SIMPLE_INTRINSIC(MterpMathMaxIntInt, std::max, GetVReg, SetI, 1);
+BINARY_II_INTRINSIC(MterpMathMaxIntInt, std::max, SetI);
+
 // java.lang.Math.max(JJ)J
-BINARY_SIMPLE_INTRINSIC(MterpMathMaxLongLong, std::max, GetVRegLong, SetJ, 2);
+BINARY_JJ_INTRINSIC(MterpMathMaxLongLong, std::max, SetJ);
+
 // java.lang.Math.abs(I)I
-UNARY_SIMPLE_INTRINSIC(MterpMathAbsInt, std::abs, GetVReg, SetI);
+UNARY_INTRINSIC(MterpMathAbsInt, std::abs, GetVReg, SetI);
+
 // java.lang.Math.abs(J)J
-UNARY_SIMPLE_INTRINSIC(MterpMathAbsLong, std::abs, GetVRegLong, SetJ);
+UNARY_INTRINSIC(MterpMathAbsLong, std::abs, GetVRegLong, SetJ);
+
 // java.lang.Math.abs(F)F
-UNARY_SIMPLE_INTRINSIC(MterpMathAbsFloat, 0x7fffffff&, GetVReg, SetI);
+UNARY_INTRINSIC(MterpMathAbsFloat, 0x7fffffff&, GetVReg, SetI);
+
 // java.lang.Math.abs(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathAbsDouble, INT64_C(0x7fffffffffffffff)&, GetVRegLong, SetJ);
+UNARY_INTRINSIC(MterpMathAbsDouble, INT64_C(0x7fffffffffffffff)&, GetVRegLong, SetJ);
+
 // java.lang.Math.sqrt(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathSqrt, std::sqrt, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathSqrt, std::sqrt, GetVRegDouble, SetD);
+
 // java.lang.Math.ceil(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathCeil, std::ceil, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathCeil, std::ceil, GetVRegDouble, SetD);
+
 // java.lang.Math.floor(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathFloor, std::floor, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathFloor, std::floor, GetVRegDouble, SetD);
+
 // java.lang.Math.sin(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathSin, std::sin, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathSin, std::sin, GetVRegDouble, SetD);
+
 // java.lang.Math.cos(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathCos, std::cos, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathCos, std::cos, GetVRegDouble, SetD);
+
 // java.lang.Math.tan(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathTan, std::tan, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathTan, std::tan, GetVRegDouble, SetD);
+
 // java.lang.Math.asin(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathAsin, std::asin, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathAsin, std::asin, GetVRegDouble, SetD);
+
 // java.lang.Math.acos(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathAcos, std::acos, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathAcos, std::acos, GetVRegDouble, SetD);
+
 // java.lang.Math.atan(D)D
-UNARY_SIMPLE_INTRINSIC(MterpMathAtan, std::atan, GetVRegDouble, SetD);
+UNARY_INTRINSIC(MterpMathAtan, std::atan, GetVRegDouble, SetD);
 
 // java.lang.String.charAt(I)C
 static ALWAYS_INLINE bool MterpStringCharAt(ShadowFrame* shadow_frame,
@@ -224,6 +320,12 @@
   return true;
 }
 
+// Macro to help keep track of what's left to implement.
+#define UNIMPLEMENTED_CASE(name)    \
+    case Intrinsics::k##name:       \
+      res = false;                  \
+      break;
+
 #define INTRINSIC_CASE(name)                                           \
     case Intrinsics::k##name:                                          \
       res = Mterp##name(shadow_frame, inst, inst_data, result_register); \
@@ -238,34 +340,136 @@
   Intrinsics intrinsic = static_cast<Intrinsics>(called_method->GetIntrinsic());
   bool res = false;  // Assume failure
   switch (intrinsic) {
-    INTRINSIC_CASE(MathMinIntInt)
-    INTRINSIC_CASE(MathMinLongLong)
-    INTRINSIC_CASE(MathMaxIntInt)
-    INTRINSIC_CASE(MathMaxLongLong)
-    INTRINSIC_CASE(MathAbsInt)
-    INTRINSIC_CASE(MathAbsLong)
-    INTRINSIC_CASE(MathAbsFloat)
+    UNIMPLEMENTED_CASE(DoubleDoubleToRawLongBits /* (D)J */)
+    UNIMPLEMENTED_CASE(DoubleDoubleToLongBits /* (D)J */)
+    UNIMPLEMENTED_CASE(DoubleIsInfinite /* (D)Z */)
+    UNIMPLEMENTED_CASE(DoubleIsNaN /* (D)Z */)
+    UNIMPLEMENTED_CASE(DoubleLongBitsToDouble /* (J)D */)
+    UNIMPLEMENTED_CASE(FloatFloatToRawIntBits /* (F)I */)
+    UNIMPLEMENTED_CASE(FloatFloatToIntBits /* (F)I */)
+    UNIMPLEMENTED_CASE(FloatIsInfinite /* (F)Z */)
+    UNIMPLEMENTED_CASE(FloatIsNaN /* (F)Z */)
+    UNIMPLEMENTED_CASE(FloatIntBitsToFloat /* (I)F */)
+    INTRINSIC_CASE(IntegerReverse)
+    INTRINSIC_CASE(IntegerReverseBytes)
+    INTRINSIC_CASE(IntegerBitCount)
+    INTRINSIC_CASE(IntegerCompare)
+    INTRINSIC_CASE(IntegerHighestOneBit)
+    INTRINSIC_CASE(IntegerLowestOneBit)
+    INTRINSIC_CASE(IntegerNumberOfLeadingZeros)
+    INTRINSIC_CASE(IntegerNumberOfTrailingZeros)
+    INTRINSIC_CASE(IntegerRotateRight)
+    INTRINSIC_CASE(IntegerRotateLeft)
+    INTRINSIC_CASE(IntegerSignum)
+    INTRINSIC_CASE(LongReverse)
+    INTRINSIC_CASE(LongReverseBytes)
+    INTRINSIC_CASE(LongBitCount)
+    INTRINSIC_CASE(LongCompare)
+    INTRINSIC_CASE(LongHighestOneBit)
+    INTRINSIC_CASE(LongLowestOneBit)
+    INTRINSIC_CASE(LongNumberOfLeadingZeros)
+    INTRINSIC_CASE(LongNumberOfTrailingZeros)
+    INTRINSIC_CASE(LongRotateRight)
+    INTRINSIC_CASE(LongRotateLeft)
+    INTRINSIC_CASE(LongSignum)
+    INTRINSIC_CASE(ShortReverseBytes)
     INTRINSIC_CASE(MathAbsDouble)
+    INTRINSIC_CASE(MathAbsFloat)
+    INTRINSIC_CASE(MathAbsLong)
+    INTRINSIC_CASE(MathAbsInt)
+    UNIMPLEMENTED_CASE(MathMinDoubleDouble /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathMinFloatFloat /* (FF)F */)
+    INTRINSIC_CASE(MathMinLongLong)
+    INTRINSIC_CASE(MathMinIntInt)
+    UNIMPLEMENTED_CASE(MathMaxDoubleDouble /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathMaxFloatFloat /* (FF)F */)
+    INTRINSIC_CASE(MathMaxLongLong)
+    INTRINSIC_CASE(MathMaxIntInt)
+    INTRINSIC_CASE(MathCos)
+    INTRINSIC_CASE(MathSin)
+    INTRINSIC_CASE(MathAcos)
+    INTRINSIC_CASE(MathAsin)
+    INTRINSIC_CASE(MathAtan)
+    UNIMPLEMENTED_CASE(MathAtan2 /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathCbrt /* (D)D */)
+    UNIMPLEMENTED_CASE(MathCosh /* (D)D */)
+    UNIMPLEMENTED_CASE(MathExp /* (D)D */)
+    UNIMPLEMENTED_CASE(MathExpm1 /* (D)D */)
+    UNIMPLEMENTED_CASE(MathHypot /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathLog /* (D)D */)
+    UNIMPLEMENTED_CASE(MathLog10 /* (D)D */)
+    UNIMPLEMENTED_CASE(MathNextAfter /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathSinh /* (D)D */)
+    INTRINSIC_CASE(MathTan)
+    UNIMPLEMENTED_CASE(MathTanh /* (D)D */)
     INTRINSIC_CASE(MathSqrt)
     INTRINSIC_CASE(MathCeil)
     INTRINSIC_CASE(MathFloor)
-    INTRINSIC_CASE(MathSin)
-    INTRINSIC_CASE(MathCos)
-    INTRINSIC_CASE(MathTan)
-    INTRINSIC_CASE(MathAsin)
-    INTRINSIC_CASE(MathAcos)
-    INTRINSIC_CASE(MathAtan)
+    UNIMPLEMENTED_CASE(MathRint /* (D)D */)
+    UNIMPLEMENTED_CASE(MathRoundDouble /* (D)J */)
+    UNIMPLEMENTED_CASE(MathRoundFloat /* (F)I */)
+    UNIMPLEMENTED_CASE(SystemArrayCopyChar /* ([CI[CII)V */)
+    UNIMPLEMENTED_CASE(SystemArrayCopy /* (Ljava/lang/Object;ILjava/lang/Object;II)V */)
+    UNIMPLEMENTED_CASE(ThreadCurrentThread /* ()Ljava/lang/Thread; */)
+    UNIMPLEMENTED_CASE(MemoryPeekByte /* (J)B */)
+    UNIMPLEMENTED_CASE(MemoryPeekIntNative /* (J)I */)
+    UNIMPLEMENTED_CASE(MemoryPeekLongNative /* (J)J */)
+    UNIMPLEMENTED_CASE(MemoryPeekShortNative /* (J)S */)
+    UNIMPLEMENTED_CASE(MemoryPokeByte /* (JB)V */)
+    UNIMPLEMENTED_CASE(MemoryPokeIntNative /* (JI)V */)
+    UNIMPLEMENTED_CASE(MemoryPokeLongNative /* (JJ)V */)
+    UNIMPLEMENTED_CASE(MemoryPokeShortNative /* (JS)V */)
     INTRINSIC_CASE(StringCharAt)
     INTRINSIC_CASE(StringCompareTo)
-    INTRINSIC_CASE(StringIndexOf)
-    INTRINSIC_CASE(StringIndexOfAfter)
     INTRINSIC_CASE(StringEquals)
     INTRINSIC_CASE(StringGetCharsNoCheck)
+    INTRINSIC_CASE(StringIndexOf)
+    INTRINSIC_CASE(StringIndexOfAfter)
+    UNIMPLEMENTED_CASE(StringStringIndexOf /* (Ljava/lang/String;)I */)
+    UNIMPLEMENTED_CASE(StringStringIndexOfAfter /* (Ljava/lang/String;I)I */)
     INTRINSIC_CASE(StringIsEmpty)
     INTRINSIC_CASE(StringLength)
-    default:
-      res = false;  // Punt
+    UNIMPLEMENTED_CASE(StringNewStringFromBytes /* ([BIII)Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringNewStringFromChars /* (II[C)Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringNewStringFromString /* (Ljava/lang/String;)Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringBufferAppend /* (Ljava/lang/String;)Ljava/lang/StringBuffer; */)
+    UNIMPLEMENTED_CASE(StringBufferLength /* ()I */)
+    UNIMPLEMENTED_CASE(StringBufferToString /* ()Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringBuilderAppend /* (Ljava/lang/String;)Ljava/lang/StringBuilder; */)
+    UNIMPLEMENTED_CASE(StringBuilderLength /* ()I */)
+    UNIMPLEMENTED_CASE(StringBuilderToString /* ()Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(UnsafeCASInt /* (Ljava/lang/Object;JII)Z */)
+    UNIMPLEMENTED_CASE(UnsafeCASLong /* (Ljava/lang/Object;JJJ)Z */)
+    UNIMPLEMENTED_CASE(UnsafeCASObject /* (Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z */)
+    UNIMPLEMENTED_CASE(UnsafeGet /* (Ljava/lang/Object;J)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetVolatile /* (Ljava/lang/Object;J)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetObject /* (Ljava/lang/Object;J)Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(UnsafeGetObjectVolatile /* (Ljava/lang/Object;J)Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(UnsafeGetLong /* (Ljava/lang/Object;J)J */)
+    UNIMPLEMENTED_CASE(UnsafeGetLongVolatile /* (Ljava/lang/Object;J)J */)
+    UNIMPLEMENTED_CASE(UnsafePut /* (Ljava/lang/Object;JI)V */)
+    UNIMPLEMENTED_CASE(UnsafePutOrdered /* (Ljava/lang/Object;JI)V */)
+    UNIMPLEMENTED_CASE(UnsafePutVolatile /* (Ljava/lang/Object;JI)V */)
+    UNIMPLEMENTED_CASE(UnsafePutObject /* (Ljava/lang/Object;JLjava/lang/Object;)V */)
+    UNIMPLEMENTED_CASE(UnsafePutObjectOrdered /* (Ljava/lang/Object;JLjava/lang/Object;)V */)
+    UNIMPLEMENTED_CASE(UnsafePutObjectVolatile /* (Ljava/lang/Object;JLjava/lang/Object;)V */)
+    UNIMPLEMENTED_CASE(UnsafePutLong /* (Ljava/lang/Object;JJ)V */)
+    UNIMPLEMENTED_CASE(UnsafePutLongOrdered /* (Ljava/lang/Object;JJ)V */)
+    UNIMPLEMENTED_CASE(UnsafePutLongVolatile /* (Ljava/lang/Object;JJ)V */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndAddInt /* (Ljava/lang/Object;JI)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndAddLong /* (Ljava/lang/Object;JJ)J */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndSetInt /* (Ljava/lang/Object;JI)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndSetLong /* (Ljava/lang/Object;JJ)J */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndSetObject /* (Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(UnsafeLoadFence /* ()V */)
+    UNIMPLEMENTED_CASE(UnsafeStoreFence /* ()V */)
+    UNIMPLEMENTED_CASE(UnsafeFullFence /* ()V */)
+    UNIMPLEMENTED_CASE(ReferenceGetReferent /* ()Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(IntegerValueOf /* (I)Ljava/lang/Integer; */)
+    case Intrinsics::kNone:
+      res = false;
       break;
+    // Note: no default case to ensure we catch any newly added intrinsics.
   }
   return res;
 }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 420fa85..70be30c 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1289,12 +1289,14 @@
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   jchar old_c = shadow_frame->GetVReg(arg_offset + 1);
   jchar new_c = shadow_frame->GetVReg(arg_offset + 2);
-  ObjPtr<mirror::String> string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::String> string =
+      hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsString());
   if (string == nullptr) {
     AbortTransactionOrFail(self, "String.replaceWithMatch with null object");
     return;
   }
-  result->SetL(string->DoReplace(self, old_c, new_c));
+  result->SetL(mirror::String::DoReplace(self, string, old_c, new_c));
 }
 
 // This allows creating the new style of String objects during compilation.
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index e7b23dc..fc41f94 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -211,6 +211,7 @@
 uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   ArtMethod* method,
                                   uint8_t* stack_map,
+                                  uint8_t* method_info,
                                   uint8_t* roots_data,
                                   size_t frame_size_in_bytes,
                                   size_t core_spill_mask,
@@ -225,6 +226,7 @@
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        stack_map,
+                                       method_info,
                                        roots_data,
                                        frame_size_in_bytes,
                                        core_spill_mask,
@@ -242,6 +244,7 @@
     result = CommitCodeInternal(self,
                                 method,
                                 stack_map,
+                                method_info,
                                 roots_data,
                                 frame_size_in_bytes,
                                 core_spill_mask,
@@ -510,6 +513,7 @@
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
+                                          uint8_t* method_info,
                                           uint8_t* roots_data,
                                           size_t frame_size_in_bytes,
                                           size_t core_spill_mask,
@@ -547,6 +551,7 @@
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       new (method_header) OatQuickMethodHeader(
           code_ptr - stack_map,
+          code_ptr - method_info,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
@@ -739,12 +744,14 @@
 
 size_t JitCodeCache::ReserveData(Thread* self,
                                  size_t stack_map_size,
+                                 size_t method_info_size,
                                  size_t number_of_roots,
                                  ArtMethod* method,
                                  uint8_t** stack_map_data,
+                                 uint8_t** method_info_data,
                                  uint8_t** roots_data) {
   size_t table_size = ComputeRootTableSize(number_of_roots);
-  size_t size = RoundUp(stack_map_size + table_size, sizeof(void*));
+  size_t size = RoundUp(stack_map_size + method_info_size + table_size, sizeof(void*));
   uint8_t* result = nullptr;
 
   {
@@ -774,11 +781,13 @@
   if (result != nullptr) {
     *roots_data = result;
     *stack_map_data = result + table_size;
+    *method_info_data = *stack_map_data + stack_map_size;
     FillRootTableLength(*roots_data, number_of_roots);
     return size;
   } else {
     *roots_data = nullptr;
     *stack_map_data = nullptr;
+    *method_info_data = nullptr;
     return 0;
   }
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index c970979..db214e7 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -105,6 +105,7 @@
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
                       uint8_t* stack_map,
+                      uint8_t* method_info,
                       uint8_t* roots_data,
                       size_t frame_size_in_bytes,
                       size_t core_spill_mask,
@@ -129,10 +130,12 @@
   // for storing `number_of_roots` roots. Returns null if there is no more room.
   // Return the number of bytes allocated.
   size_t ReserveData(Thread* self,
-                     size_t size,
+                     size_t stack_map_size,
+                     size_t method_info_size,
                      size_t number_of_roots,
                      ArtMethod* method,
                      uint8_t** stack_map_data,
+                     uint8_t** method_info_data,
                      uint8_t** roots_data)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
@@ -249,6 +252,7 @@
   uint8_t* CommitCodeInternal(Thread* self,
                               ArtMethod* method,
                               uint8_t* stack_map,
+                              uint8_t* method_info,
                               uint8_t* roots_data,
                               size_t frame_size_in_bytes,
                               size_t core_spill_mask,
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 2724b00..e2bd1cb 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -121,15 +121,16 @@
       break;
     }
 
-    uint16_t new_methods = 0;
+    uint16_t number_of_new_methods = 0;
     uint64_t start_work = NanoTime();
-    bool profile_saved_to_disk = ProcessProfilingInfo(&new_methods);
+    bool profile_saved_to_disk = ProcessProfilingInfo(/*force_save*/false, &number_of_new_methods);
     // Update the notification counter based on result. Note that there might be contention on this
     // but we don't care about to be 100% precise.
     if (!profile_saved_to_disk) {
       // If we didn't save to disk it may be because we didn't have enough new methods.
-      // Set the jit activity notifications to new_methods so we can wake up earlier if needed.
-      jit_activity_notifications_ = new_methods;
+      // Set the jit activity notifications to number_of_new_methods so we can wake up earlier
+      // if needed.
+      jit_activity_notifications_ = number_of_new_methods;
     }
     total_ns_of_work_ += NanoTime() - start_work;
   }
@@ -256,7 +257,7 @@
       total_number_of_profile_entries_cached);
 }
 
-bool ProfileSaver::ProcessProfilingInfo(uint16_t* new_methods) {
+bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
@@ -267,10 +268,16 @@
 
   bool profile_file_saved = false;
   uint64_t total_number_of_profile_entries_cached = 0;
-  *new_methods = 0;
+  if (number_of_new_methods != nullptr) {
+    *number_of_new_methods = 0;
+  }
 
   for (const auto& it : tracked_locations) {
-    if (ShuttingDown(Thread::Current())) {
+    if (!force_save && ShuttingDown(Thread::Current())) {
+      // The ProfileSaver is in shutdown mode, meaning a stop request was made and
+      // we need to exit cleanly (by waiting for the saver thread to finish). Unless
+      // we have a request for a forced save, do not do any processing so that we
+      // speed up the exit.
       return true;
     }
     const std::string& filename = it.first;
@@ -292,7 +299,8 @@
         cached_profile->GetNumberOfResolvedClasses() -
         static_cast<int64_t>(cached_info->last_save_number_of_classes);
 
-    if (delta_number_of_methods < options_.GetMinMethodsToSave() &&
+    if (!force_save &&
+        delta_number_of_methods < options_.GetMinMethodsToSave() &&
         delta_number_of_classes < options_.GetMinClassesToSave()) {
       VLOG(profiler) << "Not enough information to save to: " << filename
           << " Number of methods: " << delta_number_of_methods
@@ -300,7 +308,10 @@
       total_number_of_skipped_writes_++;
       continue;
     }
-    *new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods), *new_methods);
+    if (number_of_new_methods != nullptr) {
+      *number_of_new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods),
+                                        *number_of_new_methods);
+    }
     uint64_t bytes_written;
     // Force the save. In case the profile data is corrupted or the the profile
     // has the wrong version this will "fix" the file to the correct format.
@@ -454,6 +465,9 @@
   // Wait for the saver thread to stop.
   CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profile saver thread shutdown");
 
+  // Force save everything before destroying the instance.
+  instance_->ProcessProfilingInfo(/*force_save*/true, /*number_of_new_methods*/nullptr);
+
   {
     MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
     instance_ = nullptr;
@@ -516,8 +530,7 @@
   // but we only use this in testing when we now this won't happen.
   // Refactor the way we handle the instance so that we don't end up in this situation.
   if (saver != nullptr) {
-    uint16_t new_methods;
-    saver->ProcessProfilingInfo(&new_methods);
+    saver->ProcessProfilingInfo(/*force_save*/true, /*number_of_new_methods*/nullptr);
   }
 }
 
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 8e0682d..4dd8e60 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -79,9 +79,14 @@
 
   // The run loop for the saver.
   void Run() REQUIRES(!Locks::profiler_lock_, !wait_lock_);
+
   // Processes the existing profiling info from the jit code cache and returns
   // true if it needed to be saved to disk.
-  bool ProcessProfilingInfo(uint16_t* new_methods)
+  // If number_of_new_methods is not null, after the call it will contain the number of new methods
+  // written to disk.
+  // If force_save is true, the saver will ignore any constraints which limit IO (e.g. will write
+  // the profile to disk even if it's just one new method).
+  bool ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods)
     REQUIRES(!Locks::profiler_lock_)
     REQUIRES(!Locks::mutator_lock_);
 
diff --git a/runtime/method_info.h b/runtime/method_info.h
new file mode 100644
index 0000000..5a72125
--- /dev/null
+++ b/runtime/method_info.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_METHOD_INFO_H_
+#define ART_RUNTIME_METHOD_INFO_H_
+
+#include "base/logging.h"
+#include "leb128.h"
+#include "memory_region.h"
+
+namespace art {
+
+// Method info is for not dedupe friendly data of a method. Currently it only holds methods indices.
+// Putting this data in MethodInfo instead of code infos saves ~5% oat size.
+class MethodInfo {
+  using MethodIndexType = uint16_t;
+
+ public:
+  // Reading mode
+  explicit MethodInfo(const uint8_t* ptr) {
+    if (ptr != nullptr) {
+      num_method_indices_ = DecodeUnsignedLeb128(&ptr);
+      region_ = MemoryRegion(const_cast<uint8_t*>(ptr),
+                             num_method_indices_ * sizeof(MethodIndexType));
+    }
+  }
+
+  // Writing mode
+  MethodInfo(uint8_t* ptr, size_t num_method_indices) : num_method_indices_(num_method_indices) {
+    DCHECK(ptr != nullptr);
+    ptr = EncodeUnsignedLeb128(ptr, num_method_indices_);
+    region_ = MemoryRegion(ptr, num_method_indices_ * sizeof(MethodIndexType));
+  }
+
+  static size_t ComputeSize(size_t num_method_indices) {
+    uint8_t temp[8];
+    uint8_t* ptr = temp;
+    ptr = EncodeUnsignedLeb128(ptr, num_method_indices);
+    return (ptr - temp) + num_method_indices * sizeof(MethodIndexType);
+  }
+
+  ALWAYS_INLINE MethodIndexType GetMethodIndex(size_t index) const {
+    // Use bit functions to avoid pesky alignment requirements.
+    return region_.LoadBits(index * BitSizeOf<MethodIndexType>(), BitSizeOf<MethodIndexType>());
+  }
+
+  void SetMethodIndex(size_t index, MethodIndexType method_index) {
+    region_.StoreBits(index * BitSizeOf<MethodIndexType>(),
+                      method_index,
+                      BitSizeOf<MethodIndexType>());
+  }
+
+  size_t NumMethodIndices() const {
+    return num_method_indices_;
+  }
+
+ private:
+  size_t num_method_indices_ = 0u;
+  MemoryRegion region_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_METHOD_INFO_H_
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index f56226b..04c80c5 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -402,8 +402,8 @@
     return (T)static_cast<uintptr_t>(
         AsLongArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
   }
-  return (T)static_cast<uintptr_t>(
-      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
+  return (T)static_cast<uintptr_t>(static_cast<uint32_t>(
+      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx)));
 }
 
 template<bool kTransactionActive, bool kUnchecked>
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index f6dc551..003b03b 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -29,6 +29,7 @@
 #include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
+#include "class_ext-inl.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
@@ -83,6 +84,12 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline ClassExt* Class::GetExtData() {
+  return GetFieldObject<ClassExt, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+}
+
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline DexCache* Class::GetDexCache() {
   return GetFieldObject<DexCache, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_));
@@ -951,6 +958,10 @@
   for (ArtMethod& method : GetMethods(pointer_size)) {
     method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
   }
+  ObjPtr<ClassExt> ext(GetExtData<kDefaultVerifyFlags, kReadBarrierOption>());
+  if (!ext.IsNull()) {
+    ext->VisitNativeRoots<kReadBarrierOption, Visitor>(visitor, pointer_size);
+  }
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(PointerSize pointer_size) {
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index efb8710..26af488 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -64,10 +64,6 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-ClassExt* Class::GetExtData() {
-  return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
-}
-
 ClassExt* Class::EnsureExtDataPresent(Thread* self) {
   ObjPtr<ClassExt> existing(GetExtData());
   if (!existing.IsNull()) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 34d5bf3..27aecd5 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1162,6 +1162,8 @@
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ClassExt* GetExtData() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the ExtData for this class, allocating one if necessary. This should be the only way
diff --git a/runtime/mirror/class_ext-inl.h b/runtime/mirror/class_ext-inl.h
new file mode 100644
index 0000000..feaac85
--- /dev/null
+++ b/runtime/mirror/class_ext-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_CLASS_EXT_INL_H_
+#define ART_RUNTIME_MIRROR_CLASS_EXT_INL_H_
+
+#include "class_ext.h"
+
+#include "art_method-inl.h"
+
+namespace art {
+namespace mirror {
+
+template<ReadBarrierOption kReadBarrierOption, class Visitor>
+void ClassExt::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
+  ObjPtr<PointerArray> arr(GetObsoleteMethods<kDefaultVerifyFlags, kReadBarrierOption>());
+  if (arr.IsNull()) {
+    return;
+  }
+  int32_t len = arr->GetLength();
+  for (int32_t i = 0; i < len; i++) {
+    ArtMethod* method = arr->GetElementPtrSize<ArtMethod*,
+                                               kDefaultVerifyFlags,
+                                               kReadBarrierOption>(i, pointer_size);
+    if (method != nullptr) {
+      method->VisitRoots<kReadBarrierOption>(visitor, pointer_size);
+    }
+  }
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_CLASS_EXT_INL_H_
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
index 7270079..5dc3aca 100644
--- a/runtime/mirror/class_ext.cc
+++ b/runtime/mirror/class_ext.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "class_ext.h"
+#include "class_ext-inl.h"
 
 #include "art_method-inl.h"
 #include "base/casts.h"
@@ -24,7 +24,6 @@
 #include "gc/accounting/card_table-inl.h"
 #include "object-inl.h"
 #include "object_array.h"
-#include "object_array-inl.h"
 #include "stack_trace_element.h"
 #include "utils.h"
 #include "well_known_classes.h"
@@ -34,6 +33,11 @@
 
 GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
 
+uint32_t ClassExt::ClassSize(PointerSize pointer_size) {
+  uint32_t vtable_entries = Object::kVTableLength;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
+}
+
 void ClassExt::SetObsoleteArrays(ObjPtr<PointerArray> methods,
                                  ObjPtr<ObjectArray<DexCache>> dex_caches) {
   DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index ad8a61b..fac955a 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -17,9 +17,8 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_EXT_H_
 #define ART_RUNTIME_MIRROR_CLASS_EXT_H_
 
-#include "class-inl.h"
-
 #include "array.h"
+#include "class.h"
 #include "dex_cache.h"
 #include "gc_root.h"
 #include "object.h"
@@ -36,10 +35,7 @@
 // C++ mirror of dalvik.system.ClassExt
 class MANAGED ClassExt : public Object {
  public:
-  static uint32_t ClassSize(PointerSize pointer_size) {
-    uint32_t vtable_entries = Object::kVTableLength;
-    return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
-  }
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of dalvik.system.ClassExt.
   static constexpr uint32_t InstanceSize() {
@@ -57,8 +53,11 @@
         OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_));
   }
 
-  PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  inline PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
+        OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
   }
 
   ByteArray* GetOriginalDexFileBytes() REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -78,6 +77,10 @@
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, class Visitor>
+  inline void VisitNativeRoots(Visitor& visitor, PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static ClassExt* Alloc(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 4541ce2..f7ab26d 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -538,10 +538,10 @@
                                          PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (pointer_size == PointerSize::k32) {
-      intptr_t ptr  = reinterpret_cast<intptr_t>(new_value);
-      DCHECK_EQ(static_cast<int32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
+      uintptr_t ptr  = reinterpret_cast<uintptr_t>(new_value);
+      DCHECK_EQ(static_cast<uint32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
       SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-          field_offset, static_cast<int32_t>(ptr));
+          field_offset, static_cast<int32_t>(static_cast<uint32_t>(ptr)));
     } else {
       SetField64<kTransactionActive, kCheckTransaction, kVerifyFlags>(
           field_offset, reinterpret_cast64<int64_t>(new_value));
@@ -591,7 +591,8 @@
   ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (pointer_size == PointerSize::k32) {
-      return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
+      uint64_t address = static_cast<uint32_t>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
+      return reinterpret_cast<T>(static_cast<uintptr_t>(address));
     } else {
       int64_t v = GetField64<kVerifyFlags, kIsVolatile>(field_offset);
       return reinterpret_cast64<T>(v);
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 884b88a..de0e75b 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -89,16 +89,17 @@
   return true;
 }
 
-ObjPtr<String> String::DoReplace(Thread* self, uint16_t old_c, uint16_t new_c) {
-  DCHECK(IsCompressed() ? ContainsElement(ArrayRef<uint8_t>(value_compressed_, GetLength()), old_c)
-                        : ContainsElement(ArrayRef<uint16_t>(value_, GetLength()), old_c));
-  int32_t length = GetLength();
+ObjPtr<String> String::DoReplace(Thread* self, Handle<String> src, uint16_t old_c, uint16_t new_c) {
+  int32_t length = src->GetLength();
+  DCHECK(src->IsCompressed()
+             ? ContainsElement(ArrayRef<uint8_t>(src->value_compressed_, length), old_c)
+             : ContainsElement(ArrayRef<uint16_t>(src->value_, length), old_c));
   bool compressible =
       kUseStringCompression &&
       IsASCII(new_c) &&
-      (IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(value_, length, old_c)));
+      (src->IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(src->value_, length, old_c)));
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  const int32_t length_with_flag = String::GetFlaggedCount(GetLength(), compressible);
+  const int32_t length_with_flag = String::GetFlaggedCount(length, compressible);
   SetStringCountVisitor visitor(length_with_flag);
   ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
@@ -109,10 +110,10 @@
       return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c);
     };
     uint8_t* out = string->value_compressed_;
-    if (LIKELY(IsCompressed())) {  // LIKELY(compressible == IsCompressed())
-      std::transform(value_compressed_, value_compressed_ + length, out, replace);
+    if (LIKELY(src->IsCompressed())) {  // LIKELY(compressible == src->IsCompressed())
+      std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace);
     } else {
-      std::transform(value_, value_ + length, out, replace);
+      std::transform(src->value_, src->value_ + length, out, replace);
     }
     DCHECK(kUseStringCompression && AllASCII(out, length));
   } else {
@@ -120,10 +121,10 @@
       return (old_c != c) ? c : new_c;
     };
     uint16_t* out = string->value_;
-    if (UNLIKELY(IsCompressed())) {  // LIKELY(compressible == IsCompressed())
-      std::transform(value_compressed_, value_compressed_ + length, out, replace);
+    if (UNLIKELY(src->IsCompressed())) {  // LIKELY(compressible == src->IsCompressed())
+      std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace);
     } else {
-      std::transform(value_, value_ + length, out, replace);
+      std::transform(src->value_, src->value_ + length, out, replace);
     }
     DCHECK(!kUseStringCompression || !AllASCII(out, length));
   }
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index dbb5a4c..b59bbfb 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -96,7 +96,7 @@
 
   // Create a new string where all occurences of `old_c` are replaced with `new_c`.
   // String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
-  ObjPtr<String> DoReplace(Thread* self, uint16_t old_c, uint16_t new_c)
+  static ObjPtr<String> DoReplace(Thread* self, Handle<String> src, uint16_t old_c, uint16_t new_c)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index 2e561ff..bf33bf2 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -101,8 +101,9 @@
 
 static jstring String_doReplace(JNIEnv* env, jobject java_this, jchar old_c, jchar new_c) {
   ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::String> result =
-      soa.Decode<mirror::String>(java_this)->DoReplace(soa.Self(), old_c, new_c);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string = hs.NewHandle(soa.Decode<mirror::String>(java_this));
+  ObjPtr<mirror::String> result = mirror::String::DoReplace(soa.Self(), string, old_c, new_c);
   return soa.AddLocalReference<jstring>(result);
 }
 
diff --git a/runtime/oat.h b/runtime/oat.h
index df43107..190d533 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '5', '\0' };  // hash-based DexCache fields
+  static constexpr uint8_t kOatVersion[] = { '1', '1', '6', '\0' };  // Add method infos.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index b4e4285..8eef586 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -22,13 +22,14 @@
 
 namespace art {
 
-OatQuickMethodHeader::OatQuickMethodHeader(
-    uint32_t vmap_table_offset,
-    uint32_t frame_size_in_bytes,
-    uint32_t core_spill_mask,
-    uint32_t fp_spill_mask,
-    uint32_t code_size)
+OatQuickMethodHeader::OatQuickMethodHeader(uint32_t vmap_table_offset,
+                                           uint32_t method_info_offset,
+                                           uint32_t frame_size_in_bytes,
+                                           uint32_t core_spill_mask,
+                                           uint32_t fp_spill_mask,
+                                           uint32_t code_size)
     : vmap_table_offset_(vmap_table_offset),
+      method_info_offset_(method_info_offset),
       frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
       code_size_(code_size) {}
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 3cdde5a..f2a2af2 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -20,6 +20,7 @@
 #include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "quick/quick_method_frame_info.h"
+#include "method_info.h"
 #include "stack_map.h"
 #include "utils.h"
 
@@ -30,11 +31,13 @@
 // OatQuickMethodHeader precedes the raw code chunk generated by the compiler.
 class PACKED(4) OatQuickMethodHeader {
  public:
-  explicit OatQuickMethodHeader(uint32_t vmap_table_offset = 0U,
-                                uint32_t frame_size_in_bytes = 0U,
-                                uint32_t core_spill_mask = 0U,
-                                uint32_t fp_spill_mask = 0U,
-                                uint32_t code_size = 0U);
+  OatQuickMethodHeader() = default;
+  explicit OatQuickMethodHeader(uint32_t vmap_table_offset,
+                                uint32_t method_info_offset,
+                                uint32_t frame_size_in_bytes,
+                                uint32_t core_spill_mask,
+                                uint32_t fp_spill_mask,
+                                uint32_t code_size);
 
   ~OatQuickMethodHeader();
 
@@ -63,8 +66,7 @@
 
   const void* GetOptimizedCodeInfoPtr() const {
     DCHECK(IsOptimized());
-    const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
-    return data;
+    return reinterpret_cast<const void*>(code_ - vmap_table_offset_);
   }
 
   uint8_t* GetOptimizedCodeInfoPtr() {
@@ -76,6 +78,20 @@
     return CodeInfo(GetOptimizedCodeInfoPtr());
   }
 
+  const void* GetOptimizedMethodInfoPtr() const {
+    DCHECK(IsOptimized());
+    return reinterpret_cast<const void*>(code_ - method_info_offset_);
+  }
+
+  uint8_t* GetOptimizedMethodInfoPtr() {
+    DCHECK(IsOptimized());
+    return code_ - method_info_offset_;
+  }
+
+  MethodInfo GetOptimizedMethodInfo() const {
+    return MethodInfo(reinterpret_cast<const uint8_t*>(GetOptimizedMethodInfoPtr()));
+  }
+
   const uint8_t* GetCode() const {
     return code_;
   }
@@ -100,6 +116,18 @@
     return &vmap_table_offset_;
   }
 
+  uint32_t GetMethodInfoOffset() const {
+    return method_info_offset_;
+  }
+
+  void SetMethodInfoOffset(uint32_t offset) {
+    method_info_offset_ = offset;
+  }
+
+  const uint32_t* GetMethodInfoOffsetAddr() const {
+    return &method_info_offset_;
+  }
+
   const uint8_t* GetVmapTable() const {
     CHECK(!IsOptimized()) << "Unimplemented vmap table for optimizing compiler";
     return (vmap_table_offset_ == 0) ? nullptr : code_ - vmap_table_offset_;
@@ -160,12 +188,17 @@
   static constexpr uint32_t kCodeSizeMask = ~kShouldDeoptimizeMask;
 
   // The offset in bytes from the start of the vmap table to the end of the header.
-  uint32_t vmap_table_offset_;
+  uint32_t vmap_table_offset_ = 0u;
+  // The offset in bytes from the start of the method info to the end of the header.
+  // The method info offset is not in the CodeInfo since CodeInfo has good dedupe properties that
+  // would be lost from doing so. The method info memory region contains method indices since they
+  // are hard to dedupe.
+  uint32_t method_info_offset_ = 0u;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
   // The code size in bytes. The highest bit is used to signify if the compiled
   // code with the method header has should_deoptimize flag.
-  uint32_t code_size_;
+  uint32_t code_size_ = 0u;
   // The actual code.
   uint8_t code_[0];
 };
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 3c64d40..87bc7df 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -671,14 +671,14 @@
     soa.Self()->ClearException();
     jclass exception_class = soa.Env()->FindClass("java/lang/reflect/InvocationTargetException");
     if (exception_class == nullptr) {
-      soa.Self()->AssertPendingOOMException();
+      soa.Self()->AssertPendingException();
       return nullptr;
     }
     jmethodID mid = soa.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
     CHECK(mid != nullptr);
     jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th);
     if (exception_instance == nullptr) {
-      soa.Self()->AssertPendingOOMException();
+      soa.Self()->AssertPendingException();
       return nullptr;
     }
     soa.Env()->Throw(reinterpret_cast<jthrowable>(exception_instance));
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index e4d9aa1..13370a0 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -285,6 +285,13 @@
     LOG(WARNING) << "Current thread not detached in Runtime shutdown";
   }
 
+  if (jit_ != nullptr) {
+    // Stop the profile saver thread before marking the runtime as shutting down.
+    // The saver will try to dump the profiles before being sopped and that
+    // requires holding the mutator lock.
+    jit_->StopProfileSaver();
+  }
+
   {
     ScopedTrace trace2("Wait for shutdown cond");
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
@@ -326,8 +333,6 @@
     // Delete thread pool before the thread list since we don't want to wait forever on the
     // JIT compiler threads.
     jit_->DeleteThreadPool();
-    // Similarly, stop the profile saver thread before deleting the thread list.
-    jit_->StopProfileSaver();
   }
 
   // TODO Maybe do some locking.
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 51a24e4..0628643 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -142,8 +142,10 @@
       InlineInfo inline_info = GetCurrentInlineInfo();
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
       CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      MethodInfo method_info = method_header->GetOptimizedMethodInfo();
       DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
       return GetResolvedMethod(*GetCurrentQuickFrame(),
+                               method_info,
                                inline_info,
                                encoding.inline_info.encoding,
                                depth_in_stack_map);
diff --git a/runtime/stack.h b/runtime/stack.h
index 90a0aee..5c9614a 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -197,6 +197,11 @@
     return *reinterpret_cast<const int32_t*>(vreg);
   }
 
+  // Shorts are extended to Ints in VRegs.  Interpreter intrinsics needs them as shorts.
+  int16_t GetVRegShort(size_t i) const {
+    return static_cast<int16_t>(GetVReg(i));
+  }
+
   uint32_t* GetVRegAddr(size_t i) {
     return &vregs_[i];
   }
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index d657311..250ff2a 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -118,7 +118,8 @@
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     bool dump_stack_maps,
-                    InstructionSet instruction_set) const {
+                    InstructionSet instruction_set,
+                    const MethodInfo& method_info) const {
   CodeInfoEncoding encoding = ExtractEncoding();
   size_t number_of_stack_maps = GetNumberOfStackMaps(encoding);
   vios->Stream()
@@ -139,6 +140,7 @@
       stack_map.Dump(vios,
                      *this,
                      encoding,
+                     method_info,
                      code_offset,
                      number_of_dex_registers,
                      instruction_set,
@@ -189,6 +191,7 @@
 void StackMap::Dump(VariableIndentationOutputStream* vios,
                     const CodeInfo& code_info,
                     const CodeInfoEncoding& encoding,
+                    const MethodInfo& method_info,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     InstructionSet instruction_set,
@@ -222,12 +225,13 @@
     // We do not know the length of the dex register maps of inlined frames
     // at this level, so we just pass null to `InlineInfo::Dump` to tell
     // it not to look at these maps.
-    inline_info.Dump(vios, code_info, nullptr);
+    inline_info.Dump(vios, code_info, method_info, nullptr);
   }
 }
 
 void InlineInfo::Dump(VariableIndentationOutputStream* vios,
                       const CodeInfo& code_info,
+                      const MethodInfo& method_info,
                       uint16_t number_of_dex_registers[]) const {
   InlineInfoEncoding inline_info_encoding = code_info.ExtractEncoding().inline_info.encoding;
   vios->Stream() << "InlineInfo with depth "
@@ -245,7 +249,7 @@
     } else {
       vios->Stream()
           << std::dec
-          << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, i);
+          << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, method_info, i);
     }
     vios->Stream() << ")\n";
     if (HasDexRegisterMapAtDepth(inline_info_encoding, i) && (number_of_dex_registers != nullptr)) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index d936ce9..ffa17c9 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -23,6 +23,7 @@
 #include "bit_memory_region.h"
 #include "dex_file.h"
 #include "memory_region.h"
+#include "method_info.h"
 #include "leb128.h"
 
 namespace art {
@@ -367,7 +368,8 @@
     return region_.size();
   }
 
-  void Dump(VariableIndentationOutputStream* vios, const CodeInfo& code_info);
+  void Dump(VariableIndentationOutputStream* vios,
+            const CodeInfo& code_info);
 
   // Special (invalid) Dex register location catalog entry index meaning
   // that there is no location for a given Dex register (i.e., it is
@@ -862,6 +864,7 @@
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& code_info,
             const CodeInfoEncoding& encoding,
+            const MethodInfo& method_info,
             uint32_t code_offset,
             uint16_t number_of_dex_registers,
             InstructionSet instruction_set,
@@ -885,12 +888,12 @@
 
 class InlineInfoEncoding {
  public:
-  void SetFromSizes(size_t method_index_max,
+  void SetFromSizes(size_t method_index_idx_max,
                     size_t dex_pc_max,
                     size_t extra_data_max,
                     size_t dex_register_map_size) {
     total_bit_size_ = kMethodIndexBitOffset;
-    total_bit_size_ += MinimumBitsToStore(method_index_max);
+    total_bit_size_ += MinimumBitsToStore(method_index_idx_max);
 
     dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
     // Note: We're not encoding the dex pc if there is none. That's the case
@@ -908,7 +911,7 @@
     total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
   }
 
-  ALWAYS_INLINE FieldEncoding GetMethodIndexEncoding() const {
+  ALWAYS_INLINE FieldEncoding GetMethodIndexIdxEncoding() const {
     return FieldEncoding(kMethodIndexBitOffset, dex_pc_bit_offset_);
   }
   ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
@@ -975,16 +978,23 @@
     }
   }
 
-  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
-                                               uint32_t depth) const {
+  ALWAYS_INLINE uint32_t GetMethodIndexIdxAtDepth(const InlineInfoEncoding& encoding,
+                                                  uint32_t depth) const {
     DCHECK(!EncodesArtMethodAtDepth(encoding, depth));
-    return encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
+    return encoding.GetMethodIndexIdxEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  ALWAYS_INLINE void SetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
-                                           uint32_t depth,
-                                           uint32_t index) {
-    encoding.GetMethodIndexEncoding().Store(GetRegionAtDepth(encoding, depth), index);
+  ALWAYS_INLINE void SetMethodIndexIdxAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth,
+                                              uint32_t index) {
+    encoding.GetMethodIndexIdxEncoding().Store(GetRegionAtDepth(encoding, depth), index);
+  }
+
+
+  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                               const MethodInfo& method_info,
+                                               uint32_t depth) const {
+    return method_info.GetMethodIndex(GetMethodIndexIdxAtDepth(encoding, depth));
   }
 
   ALWAYS_INLINE uint32_t GetDexPcAtDepth(const InlineInfoEncoding& encoding,
@@ -1012,7 +1022,8 @@
   ALWAYS_INLINE ArtMethod* GetArtMethodAtDepth(const InlineInfoEncoding& encoding,
                                                uint32_t depth) const {
     uint32_t low_bits = encoding.GetExtraDataEncoding().Load(GetRegionAtDepth(encoding, depth));
-    uint32_t high_bits = encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
+    uint32_t high_bits = encoding.GetMethodIndexIdxEncoding().Load(
+        GetRegionAtDepth(encoding, depth));
     if (high_bits == 0) {
       return reinterpret_cast<ArtMethod*>(low_bits);
     } else {
@@ -1040,6 +1051,7 @@
 
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& info,
+            const MethodInfo& method_info,
             uint16_t* number_of_dex_registers) const;
 
  private:
@@ -1219,12 +1231,18 @@
     encoding.GetInvokeTypeEncoding().Store(region_, invoke_type);
   }
 
-  ALWAYS_INLINE uint32_t GetMethodIndex(const InvokeInfoEncoding& encoding) const {
+  ALWAYS_INLINE uint32_t GetMethodIndexIdx(const InvokeInfoEncoding& encoding) const {
     return encoding.GetMethodIndexEncoding().Load(region_);
   }
 
-  ALWAYS_INLINE void SetMethodIndex(const InvokeInfoEncoding& encoding, uint32_t method_index) {
-    encoding.GetMethodIndexEncoding().Store(region_, method_index);
+  ALWAYS_INLINE void SetMethodIndexIdx(const InvokeInfoEncoding& encoding,
+                                       uint32_t method_index_idx) {
+    encoding.GetMethodIndexEncoding().Store(region_, method_index_idx);
+  }
+
+  ALWAYS_INLINE uint32_t GetMethodIndex(const InvokeInfoEncoding& encoding,
+                                        MethodInfo method_info) const {
+    return method_info.GetMethodIndex(GetMethodIndexIdx(encoding));
   }
 
   bool IsValid() const { return region_.pointer() != nullptr; }
@@ -1542,7 +1560,8 @@
             uint32_t code_offset,
             uint16_t number_of_dex_registers,
             bool dump_stack_maps,
-            InstructionSet instruction_set) const;
+            InstructionSet instruction_set,
+            const MethodInfo& method_info) const;
 
   // Check that the code info has valid stack map and abort if it does not.
   void AssertValidStackMap(const CodeInfoEncoding& encoding) const {
diff --git a/runtime/utils.h b/runtime/utils.h
index 96e5bfa..24fd205 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -325,6 +325,18 @@
   return size;
 }
 
+// Return -1 if <, 0 if ==, 1 if >.
+template <typename T>
+inline static int32_t Compare(T lhs, T rhs) {
+  return (lhs < rhs) ? -1 : ((lhs == rhs) ? 0 : 1);
+}
+
+// Return -1 if < 0, 0 if == 0, 1 if > 0.
+template <typename T>
+inline static int32_t Signum(T opnd) {
+  return (opnd < 0) ? -1 : ((opnd == 0) ? 0 : 1);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/test/642-fp-callees/expected.txt b/test/642-fp-callees/expected.txt
new file mode 100644
index 0000000..77a1486
--- /dev/null
+++ b/test/642-fp-callees/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Done
diff --git a/test/642-fp-callees/fp_callees.cc b/test/642-fp-callees/fp_callees.cc
new file mode 100644
index 0000000..600f969
--- /dev/null
+++ b/test/642-fp-callees/fp_callees.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "jni.h"
+
+namespace art {
+
+// Make the array volatile, which is apparently making the C compiler
+// use FP registers in the method below.
+volatile double array[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
+
+extern "C" JNIEXPORT void JNICALL Java_Main_holdFpTemporaries(JNIEnv* env, jclass cls) {
+  jmethodID mid = env->GetStaticMethodID(cls, "caller", "(IIJ)V");
+  CHECK(mid != nullptr);
+  // Load values from the arrays, which will be loaded in callee-save FP registers.
+  double a = array[0];
+  double b = array[1];
+  double c = array[2];
+  double d = array[3];
+  double e = array[4];
+  double f = array[5];
+  double g = array[6];
+  double h = array[7];
+  double i = array[8];
+  double j = array[9];
+  double k = array[10];
+  double l = array[11];
+  env->CallStaticVoidMethod(cls, mid, 1, 1, 1L);
+  // Load it in a temporary to please C compiler with bit_cast.
+  double temp = array[0];
+  CHECK_EQ(bit_cast<int64_t>(a), bit_cast<int64_t>(temp));
+  temp = array[1];
+  CHECK_EQ(bit_cast<int64_t>(b), bit_cast<int64_t>(temp));
+  temp = array[2];
+  CHECK_EQ(bit_cast<int64_t>(c), bit_cast<int64_t>(temp));
+  temp = array[3];
+  CHECK_EQ(bit_cast<int64_t>(d), bit_cast<int64_t>(temp));
+  temp = array[4];
+  CHECK_EQ(bit_cast<int64_t>(e), bit_cast<int64_t>(temp));
+  temp = array[5];
+  CHECK_EQ(bit_cast<int64_t>(f), bit_cast<int64_t>(temp));
+  temp = array[6];
+  CHECK_EQ(bit_cast<int64_t>(g), bit_cast<int64_t>(temp));
+  temp = array[7];
+  CHECK_EQ(bit_cast<int64_t>(h), bit_cast<int64_t>(temp));
+  temp = array[8];
+  CHECK_EQ(bit_cast<int64_t>(i), bit_cast<int64_t>(temp));
+  temp = array[9];
+  CHECK_EQ(bit_cast<int64_t>(j), bit_cast<int64_t>(temp));
+  temp = array[10];
+  CHECK_EQ(bit_cast<int64_t>(k), bit_cast<int64_t>(temp));
+  temp = array[11];
+  CHECK_EQ(bit_cast<int64_t>(l), bit_cast<int64_t>(temp));
+}
+
+}  // namespace art
diff --git a/test/642-fp-callees/info.txt b/test/642-fp-callees/info.txt
new file mode 100644
index 0000000..d3e4bda
--- /dev/null
+++ b/test/642-fp-callees/info.txt
@@ -0,0 +1,2 @@
+Regression test for vixl32 backend, which used to incorrectly
+use D14 as a temporary register.
diff --git a/test/642-fp-callees/src/Main.java b/test/642-fp-callees/src/Main.java
new file mode 100644
index 0000000..fa57c93
--- /dev/null
+++ b/test/642-fp-callees/src/Main.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    holdFpTemporaries();
+    System.out.println("Done");
+  }
+
+  public static void caller(int a, int b, long c) {
+    $noinline$callee(a, b, c);
+  }
+
+  // This method is "no inline", in order to generate the
+  // bad floating point use at the call site.
+  public static void $noinline$callee(int a, int b, long c) {
+  }
+
+  public native static void holdFpTemporaries();
+}
diff --git a/test/958-methodhandle-emulated-stackframe/build b/test/958-methodhandle-stackframe/build
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/build
rename to test/958-methodhandle-stackframe/build
diff --git a/test/958-methodhandle-emulated-stackframe/expected.txt b/test/958-methodhandle-stackframe/expected.txt
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/expected.txt
rename to test/958-methodhandle-stackframe/expected.txt
diff --git a/test/958-methodhandle-emulated-stackframe/info.txt b/test/958-methodhandle-stackframe/info.txt
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/info.txt
rename to test/958-methodhandle-stackframe/info.txt
diff --git a/test/958-methodhandle-emulated-stackframe/src/Main.java b/test/958-methodhandle-stackframe/src/Main.java
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/src/Main.java
rename to test/958-methodhandle-stackframe/src/Main.java
diff --git a/test/Android.bp b/test/Android.bp
index 3bb3ef8..4ebfd74 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -337,6 +337,7 @@
         "596-monitor-inflation/monitor_inflation.cc",
         "597-deopt-new-string/deopt.cc",
         "626-const-class-linking/clear_dex_cache_types.cc",
+        "642-fp-callees/fp_callees.cc",
     ],
     shared_libs: [
         "libbacktrace",
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 01eb14e..7cd73fe 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -381,7 +381,7 @@
   908-gc-start-finish \
   913-heaps \
   961-default-iface-resolution-gen \
-  964-default-iface-init-gen
+  964-default-iface-init-gen \
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -432,6 +432,9 @@
   138-duplicate-classes-check2 \
   147-stripped-dex-fallback \
   554-jit-profile-file \
+  616-cha \
+  616-cha-abstract \
+  912-classes \
   629-vdex-speed
 
 # This test fails without an image.
@@ -440,6 +443,12 @@
   137-cfi \
   138-duplicate-classes-check \
   018-stack-overflow \
+  476-clinit-inline-static-invoke \
+  496-checker-inlining-class-loader \
+  637-checker-throw-inline \
+  616-cha \
+  616-cha-abstract \
+  912-classes \
   961-default-iface-resolution-gen \
   964-default-iface-init \
   968-default-partial-compile-gen \
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 535b94f..f3783a0 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1,12 +1,12 @@
 [
     {
-        "test": "153-reference-stress",
+        "tests": "153-reference-stress",
         "description": ["Disable 153-reference-stress temporarily until a fix",
                         "arrives."],
         "bug": "http://b/33389022"
     },
     {
-        "test": "080-oom-fragmentation",
+        "tests": "080-oom-fragmentation",
         "description": "Disable 080-oom-fragmentation due to flakes.",
         "bug": "http://b/33795328"
     },
@@ -21,7 +21,7 @@
         "bug": "http://b/34193123"
     },
     {
-        "test": "149-suspend-all-stress",
+        "tests": "149-suspend-all-stress",
         "description": "Disable 149-suspend-all-stress, its output is flaky",
         "bug": "http://b/28988206"
     },
@@ -34,13 +34,13 @@
                         "loaded systems."]
     },
     {
-        "test": "147-stripped-dex-fallback",
+        "tests": "147-stripped-dex-fallback",
         "variant": "target",
         "description": ["147-stripped-dex-fallback isn't supported on device",
                         "because --strip-dex  requires the zip command."]
     },
     {
-        "test": "569-checker-pattern-replacement",
+        "tests": "569-checker-pattern-replacement",
         "variant": "target",
         "description": ["569-checker-pattern-replacement tests behaviour",
                         "present only on host."]
@@ -54,13 +54,7 @@
                         "doesn't (and isn't meant to) work with --prebuild."]
     },
     {
-        "test": "554-jit-profile-file",
-        "variant": "no-prebuild | interpreter",
-        "description": ["554-jit-profile-file is disabled because it needs a",
-                        "primary oat file to know what it should save."]
-    },
-    {
-        "tests": ["529-checker-unresolved", "555-checker-regression-x86const"],
+        "tests": ["529-checker-unresolved"],
         "variant": "no-prebuild",
         "bug": "http://b/27784033"
     },
@@ -73,27 +67,26 @@
     {
         "tests": ["117-nopatchoat",
                   "118-noimage-dex2oat",
-                  "119-noimage-patchoat",
-                  "554-jit-profile-file"],
+                  "119-noimage-patchoat"],
         "variant": "no-relocate",
         "description": ["117-nopatchoat is not broken per-se it just doesn't",
                         "work (and isn't meant to) without --prebuild",
                         "--relocate"]
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "variant": "interp-ac",
         "description": ["Temporarily disable some broken tests when forcing",
                         "access checks in interpreter"],
         "bug": "http://b/22414682"
     },
     {
-        "test" : "629-vdex-speed",
+        "tests" : "629-vdex-speed",
         "variant": "interp-ac | no-dex2oat | interpreter | jit | relocate-npatchoat",
         "description": "629 requires compilation."
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "variant": "gcstress",
         "description": ["137-cfi needs to unwind a second forked process. We're",
                         "using a primitive sleep to wait till we hope the",
@@ -101,7 +94,7 @@
                         "slowness of gcstress makes this bad."]
     },
     {
-        "test": "152-dead-large-object",
+        "tests": "152-dead-large-object",
         "variant": "gcstress",
         "description": ["152-dead-large-object requires a heap larger than what gcstress uses."],
         "bug": "http://b/35800768"
@@ -115,7 +108,7 @@
                         "non-deterministic. Same for 913."]
     },
     {
-        "test": "961-default-iface-resolution-gen",
+        "tests": "961-default-iface-resolution-gen",
         "variant": "gcstress",
         "description": ["961-default-iface-resolution-gen and",
                         "964-default-iface-init-genare very long tests that",
@@ -125,25 +118,25 @@
                         "lot."]
     },
     {
-        "test": "964-default-iface-init-gen",
+        "tests": "964-default-iface-init-gen",
         "variant": "gcstress"
     },
     {
-        "test": "154-gc-loop",
+        "tests": "154-gc-loop",
         "variant": "gcstress | jit & debug",
         "description": ["154-gc-loop depends GC not happening too often"],
         "bug": "http://b/35917229"
     },
     {
-        "test": "115-native-bridge",
+        "tests": "115-native-bridge",
         "variant": "target",
         "description": ["115-native-bridge setup is complicated. Need to",
                         "implement it correctly for the target."]
     },
     {
-        "test": "130-hprof",
+        "tests": "130-hprof",
         "variant": "target",
-        "desription": ["130-hprof dumps the heap and runs hprof-conv to check",
+        "description": ["130-hprof dumps the heap and runs hprof-conv to check",
                        "whether the file is somewhat readable. Thi is only",
                        "possible on the host. TODO: Turn off all the other",
                        "combinations, this is more about testing actual ART",
@@ -151,7 +144,7 @@
                        "complete test) JDWP must be set up."]
     },
     {
-        "test": "131-structural-change",
+        "tests": "131-structural-change",
         "variant": "debug",
         "description": ["131 is an old test. The functionality has been",
                         "implemented at an earlier stage and is checked",
@@ -160,25 +153,19 @@
                         "punt to interpreter"]
     },
     {
-        "test": "138-duplicate-classes-check",
+        "tests": "138-duplicate-classes-check",
         "variant": "ndebug",
         "description": ["Turned on for debug builds since debug builds have",
                         "duplicate classes checks enabled"],
         "bug": "http://b/2133391"
     },
     {
-        "test": "147-stripped-dex-fallback",
+        "tests": "147-stripped-dex-fallback",
         "variant": "no-dex2oat | no-image | relocate-npatchoat",
         "description": ["147-stripped-dex-fallback is disabled because it",
                         "requires --prebuild."]
     },
     {
-        "test": "554-jit-profile-file",
-        "variant": "no-dex2oat | no-image | relocate-npatchoat",
-        "description": ["554-jit-profile-file is disabled because it needs a",
-                        "primary oat file to know what it should save."]
-    },
-    {
         "tests": ["116-nodex2oat",
                   "117-nopatchoat",
                   "118-noimage-dex2oat",
@@ -197,14 +184,14 @@
                   "138-duplicate-classes-check",
                   "018-stack-overflow",
                   "961-default-iface-resolution-gen",
-                  "964-default-iface-init"],
+                  "964-default-iface-init-gen"],
         "variant": "no-image",
         "description": ["This test fails without an image. 018, 961, 964 often",
                         "time out."],
         "bug": "http://b/34369284"
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "description": ["This test unrolls and expects managed frames, but",
                         "tracing means we run the interpreter."],
         "variant": "trace | stream"
@@ -219,7 +206,7 @@
         "variant": "trace | stream"
     },
     {
-        "test": "130-hprof",
+        "tests": "130-hprof",
         "description": "130 occasional timeout",
         "bug": "http://b/32383962",
         "variant": "trace | stream"
@@ -240,14 +227,14 @@
                         "suppressed when tracing."]
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "description": ["CFI unwinding expects managed frames, and the test",
                         "does not iterate enough to even compile. JIT also",
                         "uses Generic JNI instead of the JNI compiler."],
         "variant": "interpreter | jit"
     },
     {
-        "test": "906-iterate-heap",
+        "tests": "906-iterate-heap",
         "description": ["Test 906 iterates the heap filtering with different",
                         "options. No instances should be created between those",
                         "runs to be able to have precise checks."],
@@ -275,22 +262,22 @@
         "variant": "optimizing &  ndebuggable | regalloc_gc & ndebuggable"
     },
     {
-        "test": "596-app-images",
+        "tests": "596-app-images",
         "variant": "npictest"
     },
     {
-        "test": "055-enum-performance",
+        "tests": "055-enum-performance",
         "variant": "optimizing | regalloc_gc",
         "description": ["055: Exceeds run time limits due to heap poisoning ",
                         "instrumentation (on ARM and ARM64 devices)."]
     },
     {
-        "test": "909-attach-agent",
+        "tests": "909-attach-agent",
         "variant": "debuggable",
         "description": "Tests that check semantics for a non-debuggable app."
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "variant": "debuggable",
         "description": ["The test relies on AOT code and debuggable makes us",
                         "JIT always."]
@@ -330,7 +317,7 @@
         "variant": "optimizing | regalloc_gc"
     },
     {
-        "test": "055-enum-performance",
+        "tests": "055-enum-performance",
         "description": ["The test tests performance which degrades during",
                         "bisecting."],
         "env_vars": {"ART_TEST_BISECTION": "true"},
@@ -341,5 +328,19 @@
                   "641-checker-arraycopy"],
         "env_vars": {"ART_USE_READ_BARRIER": "true"},
         "variant": "interpreter | optimizing | regalloc_gc | jit"
+    },
+    {
+        "tests": ["912-classes",
+                  "616-cha",
+                  "616-cha-abstract"],
+        "bug": "http://b/36344364 http://b36344221",
+        "variant": "no-dex2oat | relocate-npatchoat"
+    },
+    {
+        "tests": ["476-clinit-inline-static-invoke",
+                  "496-checker-inlining-class-loader",
+                  "637-checker-throw-inline"],
+        "bug": "http://b/36365552",
+        "variant": "no-image & jit"
     }
 ]
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index be84f89..a550409 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -547,6 +547,22 @@
   finally:
     print_mutex.release()
 
+def verify_knownfailure_entry(entry):
+  supported_field = {
+      'tests' : (list, unicode),
+      'description' : (list, unicode),
+      'bug' : (unicode,),
+      'variant' : (unicode,),
+      'env_vars' : (dict,),
+  }
+  for field in entry:
+    field_type = type(entry[field])
+    if field_type not in supported_field[field]:
+      raise ValueError('%s is not supported type for %s\n%s' % (
+          str(field_type),
+          field,
+          str(entry)))
+
 def get_disabled_test_info():
   """Generate set of known failures.
 
@@ -563,15 +579,18 @@
 
   disabled_test_info = {}
   for failure in known_failures_info:
-    tests = failure.get('test')
-    if tests:
+    verify_knownfailure_entry(failure)
+    tests = failure.get('tests', [])
+    if isinstance(tests, unicode):
       tests = [tests]
-    else:
-      tests = failure.get('tests', [])
     variants = parse_variants(failure.get('variant'))
     env_vars = failure.get('env_vars')
+
     if check_env_vars(env_vars):
       for test in tests:
+        if test not in RUN_TEST_SET:
+          raise ValueError('%s is not a valid run-test' % (
+              test))
         if test in disabled_test_info:
           disabled_test_info[test] = disabled_test_info[test].union(variants)
         else:
@@ -635,6 +654,9 @@
     variant = set()
     for and_variant in and_variants:
       and_variant = and_variant.strip()
+      if and_variant not in TOTAL_VARIANTS_SET:
+        raise ValueError('%s is not a valid variant' % (
+            and_variant))
       variant.add(and_variant)
     variant_list.add(frozenset(variant))
   return variant_list