Merge "Handle primitive types in VerifierTypes::AddAssignability."
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 1691dbb..d59d8f6 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -86,6 +86,7 @@
 
 ART_GTEST_atomic_method_ref_map_test_DEX_DEPS := Interfaces
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MethodTypes MultiDex MyClass Nested Statics StaticsFromCode
+ART_GTEST_class_table_test_DEX_DEPS := XandY
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
@@ -598,6 +599,7 @@
 ART_TEST_TARGET_VALGRIND_GTEST_RULES :=
 ART_GTEST_TARGET_ANDROID_ROOT :=
 ART_GTEST_class_linker_test_DEX_DEPS :=
+ART_GTEST_class_table_test_DEX_DEPS :=
 ART_GTEST_compiler_driver_test_DEX_DEPS :=
 ART_GTEST_dex_file_test_DEX_DEPS :=
 ART_GTEST_exception_test_DEX_DEPS :=
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index 156ca9e..e41d9bd 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -22,6 +22,8 @@
 #include "detail/cmdline_debug_detail.h"
 #include "cmdline_type_parser.h"
 
+#include "android-base/strings.h"
+
 // Includes for the types that are being specialized
 #include <string>
 #include "base/logging.h"
@@ -447,7 +449,7 @@
   }
 
   std::string Join() const {
-    return art::Join(list_, Separator);
+    return android::base::Join(list_, Separator);
   }
 
   static ParseStringList<Separator> Split(const std::string& str) {
@@ -709,43 +711,43 @@
     // The rest of these options are always the wildcard from '-Xps-*'
     std::string suffix = RemovePrefix(option);
 
-    if (StartsWith(option, "min-save-period-ms:")) {
+    if (android::base::StartsWith(option, "min-save-period-ms:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::min_save_period_ms_,
              type_parser.Parse(suffix));
     }
-    if (StartsWith(option, "save-resolved-classes-delay-ms:")) {
+    if (android::base::StartsWith(option, "save-resolved-classes-delay-ms:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::save_resolved_classes_delay_ms_,
              type_parser.Parse(suffix));
     }
-    if (StartsWith(option, "startup-method-samples:")) {
+    if (android::base::StartsWith(option, "startup-method-samples:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::startup_method_samples_,
              type_parser.Parse(suffix));
     }
-    if (StartsWith(option, "min-methods-to-save:")) {
+    if (android::base::StartsWith(option, "min-methods-to-save:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::min_methods_to_save_,
              type_parser.Parse(suffix));
     }
-    if (StartsWith(option, "min-classes-to-save:")) {
+    if (android::base::StartsWith(option, "min-classes-to-save:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::min_classes_to_save_,
              type_parser.Parse(suffix));
     }
-    if (StartsWith(option, "min-notification-before-wake:")) {
+    if (android::base::StartsWith(option, "min-notification-before-wake:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::min_notification_before_wake_,
              type_parser.Parse(suffix));
     }
-    if (StartsWith(option, "max-notification-before-wake:")) {
+    if (android::base::StartsWith(option, "max-notification-before-wake:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
              &ProfileSaverOptions::max_notification_before_wake_,
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 14eac30..da03c21 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -25,6 +25,8 @@
 #include <numeric>
 #include <memory>
 
+#include "android-base/strings.h"
+
 #include "cmdline_parse_result.h"
 #include "cmdline_types.h"
 #include "token_range.h"
@@ -399,7 +401,7 @@
             allowed_values.push_back(name);
           }
 
-          std::string allowed_values_flat = Join(allowed_values, ',');
+          std::string allowed_values_flat = android::base::Join(allowed_values, ',');
           return CmdlineResult(CmdlineResult::kFailure,
                                "Argument value '" + argument + "' does not match any of known valid"
                                 "values: {" + allowed_values_flat + "}");
@@ -426,7 +428,7 @@
             allowed_values.push_back(arg_name);
           }
 
-          std::string allowed_values_flat = Join(allowed_values, ',');
+          std::string allowed_values_flat = android::base::Join(allowed_values, ',');
           return CmdlineResult(CmdlineResult::kFailure,
                                "Argument value '" + argument + "' does not match any of known valid"
                                 "values: {" + allowed_values_flat + "}");
diff --git a/cmdline/token_range.h b/cmdline/token_range.h
index 3358067..c22d6c8 100644
--- a/cmdline/token_range.h
+++ b/cmdline/token_range.h
@@ -23,6 +23,8 @@
 #include <algorithm>
 #include <memory>
 
+#include "android-base/strings.h"
+
 namespace art {
 // A range of tokens to make token matching algorithms easier.
 //
@@ -374,7 +376,7 @@
   // e.g. ["hello", "world"].join('$') == "hello$world"
   std::string Join(char separator) const {
     TokenList tmp(begin(), end());
-    return art::Join(tmp, separator);
+    return android::base::Join(tmp, separator);
     // TODO: Join should probably take an offset or iterators
   }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index a47e711..b22ca47 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -803,6 +803,13 @@
   result = result || PruneAppImageClassInternal(klass->GetSuperClass(),
                                                 &my_early_exit,
                                                 visited);
+  // Remove the class if the dex file is not in the set of dex files. This happens for classes that
+  // are from uses library if there is no profile. b/30688277
+  mirror::DexCache* dex_cache = klass->GetDexCache();
+  if (dex_cache != nullptr) {
+    result = result ||
+        dex_file_oat_index_map_.find(dex_cache->GetDexFile()) == dex_file_oat_index_map_.end();
+  }
   // Erase the element we stored earlier since we are exiting the function.
   auto it = visited->find(klass);
   DCHECK(it != visited->end());
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index fa6a522..402eeee 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1402,6 +1402,14 @@
     entry.second = index;
     ++index;
   }
+  for (auto& entry : jit_class_roots_) {
+    // Update the `roots` with the class, and replace the address temporarily
+    // stored to the index in the table.
+    uint64_t address = entry.second;
+    roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr());
+    entry.second = index;
+    ++index;
+  }
   EmitJitRootPatches(code, roots_data);
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4b11e7c..2e2c3c0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -34,6 +34,7 @@
 #include "stack_map_stream.h"
 #include "string_reference.h"
 #include "utils/label.h"
+#include "utils/type_reference.h"
 
 namespace art {
 
@@ -343,7 +344,7 @@
   void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
   size_t ComputeStackMapsSize();
   size_t GetNumberOfJitRoots() const {
-    return jit_string_roots_.size();
+    return jit_string_roots_.size() + jit_class_roots_.size();
   }
 
   // Fills the `literals` array with literals collected during code generation.
@@ -611,6 +612,8 @@
         block_order_(nullptr),
         jit_string_roots_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        jit_class_roots_(TypeReferenceValueComparator(),
+                         graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         disasm_info_(nullptr),
         stats_(stats),
         graph_(graph),
@@ -681,6 +684,7 @@
   virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
                                   const uint8_t* roots_data ATTRIBUTE_UNUSED) {
     DCHECK_EQ(jit_string_roots_.size(), 0u);
+    DCHECK_EQ(jit_class_roots_.size(), 0u);
   }
 
   // Frame size required for this method.
@@ -711,7 +715,12 @@
   // Maps a StringReference (dex_file, string_index) to the index in the literal table.
   // Entries are intially added with a 0 index, and `EmitJitRoots` will compute all the
   // indices.
-  ArenaSafeMap<StringReference, size_t, StringReferenceValueComparator> jit_string_roots_;
+  ArenaSafeMap<StringReference, uint32_t, StringReferenceValueComparator> jit_string_roots_;
+
+  // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
+  // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
+  // will compute all the indices.
+  ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
 
   DisassemblyInformation* disasm_info_;
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ed6eef1..8104613 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1216,7 +1216,9 @@
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
-                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(TypeReferenceValueComparator(),
+                         graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -5712,8 +5714,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
+    case HLoadClass::LoadKind::kJitTableAddress:
       break;
     case HLoadClass::LoadKind::kDexCachePcRelative:
       DCHECK(!Runtime::Current()->UseJitCompilation());
@@ -5814,22 +5815,12 @@
       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(cls->GetAddress(), 0u);
-      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
-      // 16-bit LDR immediate has a 5-bit offset multiplied by the size and that gives
-      // a 128B range. To try and reduce the number of literals if we load multiple types,
-      // simply split the dex cache address to a 128B aligned base loaded from a literal
-      // and the remaining offset embedded in the load.
-      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
-      DCHECK_ALIGNED(cls->GetAddress(), 4u);
-      constexpr size_t offset_bits = /* encoded bits */ 5 + /* scale */ 2;
-      uint32_t base_address = address & ~MaxInt<uint32_t>(offset_bits);
-      uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
-      __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
-      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
+    case HLoadClass::LoadKind::kJitTableAddress: {
+      __ LoadLiteral(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
+                                                               cls->GetTypeIndex(),
+                                                               cls->GetAddress()));
+      // /* GcRoot<mirror::Class> */ out = *out
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
       break;
     }
     case HLoadClass::LoadKind::kDexCachePcRelative: {
@@ -7379,10 +7370,6 @@
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
-Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) {
-  return DeduplicateUint32Literal(address, &uint32_literals_);
-}
-
 Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file,
                                                        dex::StringIndex string_index) {
   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
@@ -7391,6 +7378,15 @@
       [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
 }
 
+Literal* CodeGeneratorARM::DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                                      dex::TypeIndex type_index,
+                                                      uint64_t address) {
+  jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), address);
+  return jit_class_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
 inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches(
     const ArenaDeque<PcRelativePatchInfo>& infos,
@@ -7707,18 +7703,28 @@
   }
 }
 
+static void PatchJitRootUse(uint8_t* code,
+                            const uint8_t* roots_data,
+                            Literal* literal,
+                            uint64_t index_in_table) {
+  DCHECK(literal->GetLabel()->IsBound());
+  uint32_t literal_offset = literal->GetLabel()->Position();
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  uint8_t* data = code + literal_offset;
+  reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
 void CodeGeneratorARM::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const auto& entry : jit_string_patches_) {
     const auto& it = jit_string_roots_.find(entry.first);
     DCHECK(it != jit_string_roots_.end());
-    size_t index_in_table = it->second;
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = literal->GetLabel()->Position();
-    uintptr_t address =
-        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
-    uint8_t* data = code + literal_offset;
-    reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+    PatchJitRootUse(code, roots_data, entry.second, it->second);
+  }
+  for (const auto& entry : jit_class_patches_) {
+    const auto& it = jit_class_roots_.find(entry.first);
+    DCHECK(it != jit_class_roots_.end());
+    PatchJitRootUse(code, roots_data, entry.second, it->second);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8230512..605169d 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -489,8 +489,10 @@
                                              dex::StringIndex string_index);
   Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
-  Literal* DeduplicateDexCacheAddressLiteral(uint32_t address);
   Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index);
+  Literal* DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                      dex::TypeIndex type_index,
+                                      uint64_t address);
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
@@ -599,9 +601,9 @@
   using StringToLiteralMap = ArenaSafeMap<StringReference,
                                           Literal*,
                                           StringReferenceValueComparator>;
-  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
-                                            Literal*,
-                                            TypeReferenceValueComparator>;
+  using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                        Literal*,
+                                        TypeReferenceValueComparator>;
 
   Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
@@ -638,7 +640,7 @@
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  BootTypeToLiteralMap boot_image_type_patches_;
+  TypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // Deduplication map for patchable boot image addresses.
@@ -646,6 +648,8 @@
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
+  // Patches for class literals in JIT compiled code.
+  TypeToLiteralMap jit_class_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 6eebd69..5cff303 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1162,7 +1162,9 @@
       boot_image_address_patches_(std::less<uint32_t>(),
                                   graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
-                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(TypeReferenceValueComparator(),
+                         graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
@@ -4169,11 +4171,6 @@
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
-vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddressLiteral(
-    uint64_t address) {
-  return DeduplicateUint64Literal(address);
-}
-
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
     const DexFile& dex_file, dex::StringIndex string_index) {
   jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u);
@@ -4182,6 +4179,14 @@
       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
 }
 
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
+    const DexFile& dex_file, dex::TypeIndex type_index, uint64_t address) {
+  jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), address);
+  return jit_class_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+}
+
 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
                                              vixl::aarch64::Register reg) {
   DCHECK(reg.IsX());
@@ -4359,7 +4364,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kDexCacheAddress:
+    case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kDexCachePcRelative:
@@ -4452,26 +4457,16 @@
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(cls->GetAddress(), 0u);
-      // LDR immediate has a 12-bit offset multiplied by the size and for 32-bit loads
-      // that gives a 16KiB range. To try and reduce the number of literals if we load
-      // multiple types, simply split the dex cache address to a 16KiB aligned base
-      // loaded from a literal and the remaining offset embedded in the load.
-      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
-      DCHECK_ALIGNED(cls->GetAddress(), 4u);
-      constexpr size_t offset_bits = /* encoded bits */ 12 + /* scale */ 2;
-      uint64_t base_address = cls->GetAddress() & ~MaxInt<uint64_t>(offset_bits);
-      uint32_t offset = cls->GetAddress() & MaxInt<uint64_t>(offset_bits);
-      __ Ldr(out.X(), codegen_->DeduplicateDexCacheAddressLiteral(base_address));
-      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
+    case HLoadClass::LoadKind::kJitTableAddress: {
+      __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
+                                                       cls->GetTypeIndex(),
+                                                       cls->GetAddress()));
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               out.X(),
-                              offset,
+                              /* offset */ 0,
                               /* fixup_label */ nullptr,
-                              read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
+                              kCompilerReadBarrierOption);
       break;
     }
     case HLoadClass::LoadKind::kDexCachePcRelative: {
@@ -5782,17 +5777,27 @@
   }
 }
 
+static void PatchJitRootUse(uint8_t* code,
+                            const uint8_t* roots_data,
+                            vixl::aarch64::Literal<uint32_t>* literal,
+                            uint64_t index_in_table) {
+  uint32_t literal_offset = literal->GetOffset();
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  uint8_t* data = code + literal_offset;
+  reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const auto& entry : jit_string_patches_) {
     const auto& it = jit_string_roots_.find(entry.first);
     DCHECK(it != jit_string_roots_.end());
-    size_t index_in_table = it->second;
-    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
-    uint32_t literal_offset = literal->GetOffset();
-    uintptr_t address =
-        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
-    uint8_t* data = code + literal_offset;
-    reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+    PatchJitRootUse(code, roots_data, entry.second, it->second);
+  }
+  for (const auto& entry : jit_class_patches_) {
+    const auto& it = jit_class_roots_.find(entry.first);
+    DCHECK(it != jit_class_roots_.end());
+    PatchJitRootUse(code, roots_data, entry.second, it->second);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 868c8b0..85b6f9f 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -566,9 +566,11 @@
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
                                                                     dex::TypeIndex type_index);
   vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
-  vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address);
   vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
                                                                 dex::StringIndex string_index);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                                               dex::TypeIndex string_index,
+                                                               uint64_t address);
 
   void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
   void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -682,9 +684,9 @@
   using StringToLiteralMap = ArenaSafeMap<StringReference,
                                           vixl::aarch64::Literal<uint32_t>*,
                                           StringReferenceValueComparator>;
-  using BootTypeToLiteralMap = ArenaSafeMap<TypeReference,
-                                            vixl::aarch64::Literal<uint32_t>*,
-                                            TypeReferenceValueComparator>;
+  using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                        vixl::aarch64::Literal<uint32_t>*,
+                                        TypeReferenceValueComparator>;
 
   vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
                                                              Uint32ToLiteralMap* map);
@@ -733,8 +735,7 @@
 
   // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
   Uint32ToLiteralMap uint32_literals_;
-  // Deduplication map for 64-bit literals, used for non-patchable method address, method code
-  // or string dex cache address.
+  // Deduplication map for 64-bit literals, used for non-patchable method address or method code.
   Uint64ToLiteralMap uint64_literals_;
   // Method patch info, map MethodReference to a literal for method address and method code.
   MethodToLiteralMap method_patches_;
@@ -749,7 +750,7 @@
   // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Deduplication map for boot type literals for kBootImageLinkTimeAddress.
-  BootTypeToLiteralMap boot_image_type_patches_;
+  TypeToLiteralMap boot_image_type_patches_;
   // PC-relative type patch info.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // Deduplication map for patchable boot image addresses.
@@ -757,6 +758,8 @@
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
+  // Patches for class literals in JIT compiled code.
+  TypeToLiteralMap jit_class_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 3a3d2a9..2c6df38 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -5776,7 +5776,7 @@
     case HLoadClass::LoadKind::kBootImageAddress:
       // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
       return HLoadClass::LoadKind::kDexCacheViaMethod;
-    case HLoadClass::LoadKind::kDexCacheAddress:
+    case HLoadClass::LoadKind::kJitTableAddress:
       // TODO(VIXL): Enable it back when literal pools are fixed in VIXL.
       return HLoadClass::LoadKind::kDexCacheViaMethod;
     case HLoadClass::LoadKind::kDexCachePcRelative:
@@ -5868,7 +5868,7 @@
       TODO_VIXL32(FATAL);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheAddress: {
+    case HLoadClass::LoadKind::kJitTableAddress: {
       TODO_VIXL32(FATAL);
       break;
     }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ff48f66..cae4161 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -5251,9 +5251,9 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kDexCacheAddress:
+    case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
-      fallback_load = false;
+      fallback_load = true;
       break;
     case HLoadClass::LoadKind::kDexCachePcRelative:
       DCHECK(!Runtime::Current()->UseJitCompilation());
@@ -5614,17 +5614,8 @@
                      codegen_->DeduplicateBootImageAddressLiteral(address));
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(cls->GetAddress(), 0u);
-      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
-      static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes.");
-      DCHECK_ALIGNED(cls->GetAddress(), 4u);
-      int16_t offset = Low16Bits(address);
-      uint32_t base_address = address - offset;  // This accounts for offset sign extension.
-      __ Lui(out, High16Bits(base_address));
-      // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset);
-      generate_null_check = !cls->IsInDexCache();
+    case HLoadClass::LoadKind::kJitTableAddress: {
+      LOG(FATAL) << "Unimplemented";
       break;
     }
     case HLoadClass::LoadKind::kDexCachePcRelative: {
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 7efed8c..a6b41c0 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -18,6 +18,8 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
 
 #include <cstdint>
+#include <cstdlib>
+#include <limits>
 
 namespace art {
 
@@ -32,6 +34,12 @@
 // that it has been previously visited by the InstructionCodeGenerator.
 bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input);
 
+template <typename T> T AbsOrMin(T value) {
+  return (value == std::numeric_limits<T>::min())
+      ? value
+      : std::abs(value);
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d6e92cc..8612a67 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1013,6 +1013,7 @@
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
       fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       method_address_offset_(-1) {
@@ -6034,7 +6035,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kDexCacheAddress:
+    case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -6073,6 +6074,16 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
+Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file,
+                                              dex::TypeIndex dex_index,
+                                              uint64_t address) {
+  jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index), address);
+  // Add a patch entry and return the label.
+  jit_class_patches_.emplace_back(dex_file, dex_index.index_);
+  PatchInfo<Label>* info = &jit_class_patches_.back();
+  return &info->label;
+}
+
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
@@ -6124,16 +6135,12 @@
       codegen_->RecordSimplePatch();
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(cls->GetAddress(), 0u);
-      uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
+    case HLoadClass::LoadKind::kJitTableAddress: {
+      Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
+      Label* fixup_label = codegen_->NewJitRootClassPatch(
+          cls->GetDexFile(), cls->GetTypeIndex(), cls->GetAddress());
       // /* GcRoot<mirror::Class> */ out = *address
-      GenerateGcRootFieldLoad(cls,
-                              out_loc,
-                              Address::Absolute(address),
-                              /* fixup_label */ nullptr,
-                              read_barrier_option);
-      generate_null_check = !cls->IsInDexCache();
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       break;
     }
     case HLoadClass::LoadKind::kDexCachePcRelative: {
@@ -7770,18 +7777,31 @@
   }
 }
 
+void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
+                                       const uint8_t* roots_data,
+                                       const PatchInfo<Label>& info,
+                                       uint64_t index_in_table) const {
+  uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+  reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
+     dchecked_integral_cast<uint32_t>(address);
+}
+
 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const PatchInfo<Label>& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file,
-                                                            dex::StringIndex(info.index)));
+    const auto& it = jit_string_roots_.find(
+        StringReference(&info.dex_file, dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
-    size_t index_in_table = it->second;
-    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-    uintptr_t address =
-        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
-    typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
-    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
-       dchecked_integral_cast<uint32_t>(address);
+    PatchJitRootUse(code, roots_data, info, it->second);
+  }
+
+  for (const PatchInfo<Label>& info : jit_class_patches_) {
+    const auto& it = jit_class_roots_.find(
+        TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
+    DCHECK(it != jit_class_roots_.end());
+    PatchJitRootUse(code, roots_data, info, it->second);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 2ae3670..c44da97 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -416,12 +416,17 @@
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index);
+  Label* NewJitRootClassPatch(const DexFile& dex_file, dex::TypeIndex dex_index, uint64_t address);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   // Emit linker patches.
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void PatchJitRootUse(uint8_t* code,
+                       const uint8_t* roots_data,
+                       const PatchInfo<Label>& info,
+                       uint64_t index_in_table) const;
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
   // Emit a write barrier.
@@ -623,6 +628,9 @@
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
 
+  // Patches for class root accesses in JIT compiled code.
+  ArenaDeque<PatchInfo<Label>> jit_class_patches_;
+
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
   int32_t constant_area_start_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4474dec..7dfc736 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1260,7 +1260,8 @@
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -5460,8 +5461,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
       break;
-    case HLoadClass::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
+    case HLoadClass::LoadKind::kJitTableAddress:
       break;
     case HLoadClass::LoadKind::kDexCachePcRelative:
       DCHECK(!Runtime::Current()->UseJitCompilation());
@@ -5500,6 +5500,16 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
+Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
+                                                 dex::TypeIndex dex_index,
+                                                 uint64_t address) {
+  jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index), address);
+  // Add a patch entry and return the label.
+  jit_class_patches_.emplace_back(dex_file, dex_index.index_);
+  PatchInfo<Label>* info = &jit_class_patches_.back();
+  return &info->label;
+}
+
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
   if (cls->NeedsAccessCheck()) {
@@ -5543,26 +5553,13 @@
       codegen_->RecordSimplePatch();
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheAddress: {
-      DCHECK_NE(cls->GetAddress(), 0u);
+    case HLoadClass::LoadKind::kJitTableAddress: {
+      Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
+                                          /* no_rip */ true);
+      Label* fixup_label =
+          codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetAddress());
       // /* GcRoot<mirror::Class> */ out = *address
-      if (IsUint<32>(cls->GetAddress())) {
-        Address address = Address::Absolute(cls->GetAddress(), /* no_rip */ true);
-        GenerateGcRootFieldLoad(cls,
-                                out_loc,
-                                address,
-                                /* fixup_label */ nullptr,
-                                read_barrier_option);
-      } else {
-        // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
-        __ movq(out, Immediate(cls->GetAddress()));
-        GenerateGcRootFieldLoad(cls,
-                                out_loc,
-                                Address(out, 0),
-                                /* fixup_label */ nullptr,
-                                read_barrier_option);
-      }
-      generate_null_check = !cls->IsInDexCache();
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       break;
     }
     case HLoadClass::LoadKind::kDexCachePcRelative: {
@@ -7127,18 +7124,31 @@
   }
 }
 
+void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
+                                          const uint8_t* roots_data,
+                                          const PatchInfo<Label>& info,
+                                          uint64_t index_in_table) const {
+  uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+  reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
+     dchecked_integral_cast<uint32_t>(address);
+}
+
 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
   for (const PatchInfo<Label>& info : jit_string_patches_) {
-    const auto& it = jit_string_roots_.find(StringReference(&info.dex_file,
-                                                            dex::StringIndex(info.index)));
+    const auto& it = jit_string_roots_.find(
+        StringReference(&info.dex_file, dex::StringIndex(info.index)));
     DCHECK(it != jit_string_roots_.end());
-    size_t index_in_table = it->second;
-    uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-    uintptr_t address =
-        reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
-    typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
-    reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
-       dchecked_integral_cast<uint32_t>(address);
+    PatchJitRootUse(code, roots_data, info, it->second);
+  }
+
+  for (const PatchInfo<Label>& info : jit_class_patches_) {
+    const auto& it = jit_class_roots_.find(
+        TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
+    DCHECK(it != jit_class_roots_.end());
+    PatchJitRootUse(code, roots_data, info, it->second);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 2f41f73..391a23b 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -413,11 +413,17 @@
   Label* NewStringBssEntryPatch(HLoadString* load_string);
   Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index);
+  Label* NewJitRootClassPatch(const DexFile& dex_file, dex::TypeIndex dex_index, uint64_t address);
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  void PatchJitRootUse(uint8_t* code,
+                       const uint8_t* roots_data,
+                       const PatchInfo<Label>& info,
+                       uint64_t index_in_table) const;
+
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
@@ -608,6 +614,9 @@
   // Patches for string literals in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
 
+  // Patches for class literals in JIT compiled code.
+  ArenaDeque<PatchInfo<Label>> jit_class_patches_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 8d93867..fe4662a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1444,7 +1444,7 @@
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
   HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
-  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
+  HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, stats_);
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 658b804..c615df1 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -1185,6 +1185,18 @@
   RecordSimplification();
 }
 
+// Return whether x / divisor == x * (1.0f / divisor), for every float x.
+static constexpr bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
+  // True, if the most significant bits of divisor are 0.
+  return ((divisor & 0x7fffff) == 0);
+}
+
+// Return whether x / divisor == x * (1.0 / divisor), for every double x.
+static constexpr bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
+  // True, if the most significant bits of divisor are 0.
+  return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
+}
+
 void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 594255c..925d4f1 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2487,8 +2487,8 @@
       return os << "BootImageLinkTimePcRelative";
     case HLoadClass::LoadKind::kBootImageAddress:
       return os << "BootImageAddress";
-    case HLoadClass::LoadKind::kDexCacheAddress:
-      return os << "DexCacheAddress";
+    case HLoadClass::LoadKind::kJitTableAddress:
+      return os << "JitTableAddress";
     case HLoadClass::LoadKind::kDexCachePcRelative:
       return os << "DexCachePcRelative";
     case HLoadClass::LoadKind::kDexCacheViaMethod:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e3f4d8f..4a8cfcb 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -125,6 +125,11 @@
   kAnalysisSuccess,
 };
 
+template <typename T>
+static inline typename std::make_unsigned<T>::type MakeUnsigned(T x) {
+  return static_cast<typename std::make_unsigned<T>::type>(x);
+}
+
 class HInstructionList : public ValueObject {
  public:
   HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {}
@@ -5493,9 +5498,8 @@
     // GetIncludePatchInformation().
     kBootImageAddress,
 
-    // Load from the resolved types array at an absolute address.
-    // Used for classes outside the boot image referenced by JIT-compiled code.
-    kDexCacheAddress,
+    // Load from the root table associated with the JIT compiled method.
+    kJitTableAddress,
 
     // Load from resolved types array in the dex cache using a PC-relative load.
     // Used for classes outside boot image when we know that we can access
@@ -5588,7 +5592,6 @@
            NeedsAccessCheck();
   }
 
-
   bool CanThrow() const OVERRIDE {
     return CanCallRuntime();
   }
@@ -5613,7 +5616,9 @@
     return load_data_.address;
   }
 
-  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { return !IsReferrersClass(); }
+  bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+    return !IsReferrersClass();
+  }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
     return SideEffects::CanTriggerGC();
@@ -5672,7 +5677,8 @@
   }
 
   static bool HasAddress(LoadKind load_kind) {
-    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+    return load_kind == LoadKind::kBootImageAddress ||
+        load_kind == LoadKind::kJitTableAddress;
   }
 
   static bool HasDexCacheReference(LoadKind load_kind) {
@@ -5691,7 +5697,7 @@
 
   union {
     uint32_t dex_cache_element_index;   // Only for dex cache reference.
-    uint64_t address;  // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets.
+    uint64_t address;  // Up to 64-bit, needed for kJitTableAddress on 64-bit targets.
   } load_data_;
 
   ReferenceTypeInfo loaded_class_rti_;
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 013e110..0e02311 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -24,12 +24,22 @@
 #include "optimizing/code_generator.h"
 #include "optimizing/optimizing_unit_test.h"
 #include "utils/assembler.h"
+#ifdef ART_USE_VIXL_ARM_BACKEND
+#include "utils/arm/assembler_arm_vixl.h"
+#else
 #include "utils/arm/assembler_thumb2.h"
+#endif
 #include "utils/mips/assembler_mips.h"
 #include "utils/mips64/assembler_mips64.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
 
+#ifdef ART_USE_VIXL_ARM_BACKEND
+namespace vixl32 = vixl::aarch32;
+
+using vixl32::r0;
+#endif
+
 namespace art {
 
 // Run the tests only on host.
@@ -158,8 +168,7 @@
     TestImpl(isa, #isa, expected_asm, expected_cfi);          \
   }
 
-// TODO(VIXL): Support this test for the VIXL backend.
-#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_ISA(kThumb2)
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
@@ -178,8 +187,7 @@
 TEST_ISA(kMips64)
 #endif
 
-// TODO(VIXL): Support this test for the VIXL backend.
-#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
+#ifdef ART_ENABLE_CODEGEN_arm
 TEST_F(OptimizingCFITest, kThumb2Adjust) {
   std::vector<uint8_t> expected_asm(
       expected_asm_kThumb2_adjust,
@@ -188,6 +196,16 @@
       expected_cfi_kThumb2_adjust,
       expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust));
   SetUpFrame(kThumb2);
+#ifdef ART_USE_VIXL_ARM_BACKEND
+#define __ down_cast<arm::ArmVIXLAssembler*>(GetCodeGenerator() \
+    ->GetAssembler())->GetVIXLAssembler()->
+  vixl32::Label target;
+  __ CompareAndBranchIfZero(r0, &target);
+  // Push the target out of range of CBZ.
+  for (size_t i = 0; i != 65; ++i) {
+    __ Ldr(r0, vixl32::MemOperand(r0));
+  }
+#else
 #define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())->
   Label target;
   __ CompareAndBranchIfZero(arm::R0, &target);
@@ -195,6 +213,7 @@
   for (size_t i = 0; i != 65; ++i) {
     __ ldr(arm::R0, arm::Address(arm::R0));
   }
+#endif
   __ Bind(&target);
 #undef __
   Finish();
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index f735dc8..82670c3 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -223,8 +223,16 @@
 // 0x00000040: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
+#ifdef ART_USE_VIXL_ARM_BACKEND
+    // VIXL emits an extra 2 bytes here for a 32-bit beq as there is no
+    // optimistic 16-bit emit and subsequent fixup for out of reach targets
+    // as with the current assembler.
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28, 0x00, 0xF0,
+    0x41, 0x80, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+#else
     0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28,
     0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+#endif
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
@@ -239,7 +247,11 @@
 };
 static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
     0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+#ifdef ART_USE_VIXL_ARM_BACKEND
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
+#else
     0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A,
+#endif
     0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
     0x0E, 0x40,
 };
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8ea2b06..ba7012a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,6 +22,8 @@
 
 #include <stdint.h>
 
+#include "android-base/strings.h"
+
 #ifdef ART_ENABLE_CODEGEN_arm
 #include "dex_cache_array_fixups_arm.h"
 #endif
@@ -375,7 +377,8 @@
                             const DexFile& dex_file,
                             Handle<mirror::DexCache> dex_cache,
                             ArtMethod* method,
-                            bool osr) const;
+                            bool osr,
+                            VariableSizedHandleScope* handles) const;
 
   void MaybeRunInliner(HGraph* graph,
                        CodeGenerator* codegen,
@@ -495,7 +498,7 @@
                                 number_of_dex_registers,
                                 /* depth */ 0);
   } else if (opt_name == HSharpening::kSharpeningPassName) {
-    return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+    return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
   } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
     return new (arena) HSelectGenerator(graph, stats);
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
@@ -767,7 +770,8 @@
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
-  HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
+  HSharpening* sharpening = new (arena) HSharpening(
+      graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier$after_inlining");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
@@ -866,7 +870,8 @@
                                               const DexFile& dex_file,
                                               Handle<mirror::DexCache> dex_cache,
                                               ArtMethod* method,
-                                              bool osr) const {
+                                              bool osr,
+                                              VariableSizedHandleScope* handles) const {
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -976,64 +981,56 @@
                              compiler_driver,
                              dump_mutex_);
 
-  VLOG(compiler) << "Building " << pass_observer.GetMethodName();
-
   {
-    ScopedObjectAccess soa(Thread::Current());
-    VariableSizedHandleScope handles(soa.Self());
-    // Do not hold `mutator_lock_` between optimizations.
-    ScopedThreadSuspension sts(soa.Self(), kNative);
-
-    {
-      PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
-      HGraphBuilder builder(graph,
-                            &dex_compilation_unit,
-                            &dex_compilation_unit,
-                            &dex_file,
-                            *code_item,
-                            compiler_driver,
-                            compilation_stats_.get(),
-                            interpreter_metadata,
-                            dex_cache,
-                            &handles);
-      GraphAnalysisResult result = builder.BuildGraph();
-      if (result != kAnalysisSuccess) {
-        switch (result) {
-          case kAnalysisSkipped:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
-            break;
-          case kAnalysisInvalidBytecode:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
-            break;
-          case kAnalysisFailThrowCatchLoop:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
-            break;
-          case kAnalysisFailAmbiguousArrayOp:
-            MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
-            break;
-          case kAnalysisSuccess:
-            UNREACHABLE();
-        }
-        pass_observer.SetGraphInBadState();
-        return nullptr;
+    VLOG(compiler) << "Building " << pass_observer.GetMethodName();
+    PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
+    HGraphBuilder builder(graph,
+                          &dex_compilation_unit,
+                          &dex_compilation_unit,
+                          &dex_file,
+                          *code_item,
+                          compiler_driver,
+                          compilation_stats_.get(),
+                          interpreter_metadata,
+                          dex_cache,
+                          handles);
+    GraphAnalysisResult result = builder.BuildGraph();
+    if (result != kAnalysisSuccess) {
+      switch (result) {
+        case kAnalysisSkipped:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped);
+          break;
+        case kAnalysisInvalidBytecode:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode);
+          break;
+        case kAnalysisFailThrowCatchLoop:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop);
+          break;
+        case kAnalysisFailAmbiguousArrayOp:
+          MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+          break;
+        case kAnalysisSuccess:
+          UNREACHABLE();
       }
+      pass_observer.SetGraphInBadState();
+      return nullptr;
     }
-
-    RunOptimizations(graph,
-                     codegen.get(),
-                     compiler_driver,
-                     dex_compilation_unit,
-                     &pass_observer,
-                     &handles);
-
-    RegisterAllocator::Strategy regalloc_strategy =
-      compiler_options.GetRegisterAllocationStrategy();
-    AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
-
-    codegen->Compile(code_allocator);
-    pass_observer.DumpDisassembly();
   }
 
+  RunOptimizations(graph,
+                   codegen.get(),
+                   compiler_driver,
+                   dex_compilation_unit,
+                   &pass_observer,
+                   handles);
+
+  RegisterAllocator::Strategy regalloc_strategy =
+    compiler_options.GetRegisterAllocationStrategy();
+  AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy);
+
+  codegen->Compile(code_allocator);
+  pass_observer.DumpDisassembly();
+
   return codegen.release();
 }
 
@@ -1055,19 +1052,27 @@
             verified_method->GetEncounteredVerificationFailures())) {
     ArenaAllocator arena(Runtime::Current()->GetArenaPool());
     CodeVectorAllocator code_allocator(&arena);
-    std::unique_ptr<CodeGenerator> codegen(
-        TryCompile(&arena,
-                   &code_allocator,
-                   code_item,
-                   access_flags,
-                   invoke_type,
-                   class_def_idx,
-                   method_idx,
-                   jclass_loader,
-                   dex_file,
-                   dex_cache,
-                   nullptr,
-                   /* osr */ false));
+    std::unique_ptr<CodeGenerator> codegen;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      VariableSizedHandleScope handles(soa.Self());
+      // Go to native so that we don't block GC during compilation.
+      ScopedThreadSuspension sts(soa.Self(), kNative);
+      codegen.reset(
+          TryCompile(&arena,
+                     &code_allocator,
+                     code_item,
+                     access_flags,
+                     invoke_type,
+                     class_def_idx,
+                     method_idx,
+                     jclass_loader,
+                     dex_file,
+                     dex_cache,
+                     nullptr,
+                     /* osr */ false,
+                     &handles));
+    }
     if (codegen.get() != nullptr) {
       MaybeRecordStat(MethodCompilationStat::kCompiled);
       method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
@@ -1112,7 +1117,8 @@
 bool IsCompilingWithCoreImage() {
   const std::string& image = Runtime::Current()->GetImageLocation();
   // TODO: This is under-approximating...
-  if (EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art")) {
+  if (android::base::EndsWith(image, "core.art") ||
+      android::base::EndsWith(image, "core-optimizing.art")) {
     return true;
   }
   return false;
@@ -1138,6 +1144,8 @@
 
   ArenaAllocator arena(Runtime::Current()->GetJitArenaPool());
   CodeVectorAllocator code_allocator(&arena);
+  VariableSizedHandleScope handles(self);
+
   std::unique_ptr<CodeGenerator> codegen;
   {
     // Go to native so that we don't block GC during compilation.
@@ -1154,7 +1162,8 @@
                    *dex_file,
                    dex_cache,
                    method,
-                   osr));
+                   osr,
+                   &handles));
     if (codegen.get() == nullptr) {
       return false;
     }
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index daf160a..bbbb1a1 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -151,7 +151,7 @@
 
   bool is_in_dex_cache = false;
   bool is_in_boot_image = false;
-  HLoadClass::LoadKind desired_load_kind;
+  HLoadClass::LoadKind desired_load_kind = static_cast<HLoadClass::LoadKind>(-1);
   uint64_t address = 0u;  // Class or dex cache element address.
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -190,18 +190,19 @@
           // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
           desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
           address = reinterpret_cast64<uint64_t>(klass);
+        } else if (is_in_dex_cache) {
+          desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
+          // We store in the address field the location of the stack reference maintained
+          // by the handle. We do this now so that the code generation does not need to figure
+          // out which class loader to use.
+          address = reinterpret_cast<uint64_t>(handles_->NewHandle(klass).GetReference());
         } else {
-          // Note: If the class is not in the dex cache or isn't initialized, the
-          // instruction needs environment and will not be inlined across dex files.
-          // Within a dex file, the slow-path helper loads the correct class and
-          // inlined frames are used correctly for OOM stack trace.
-          // TODO: Write a test for this. Bug: 29416588
-          desired_load_kind = HLoadClass::LoadKind::kDexCacheAddress;
-          void* dex_cache_element_address = &dex_cache->GetResolvedTypes()[type_index.index_];
-          address = reinterpret_cast64<uint64_t>(dex_cache_element_address);
+          // Class not loaded yet. Fallback to the dex cache.
+          // TODO(ngeoffray): Generate HDeoptimize instead.
+          desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
         }
-        // AOT app compilation. Check if the class is in the boot image.
       } else if (is_in_boot_image && !codegen_->GetCompilerOptions().GetCompilePic()) {
+        // AOT app compilation. Check if the class is in the boot image.
         desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
         address = reinterpret_cast64<uint64_t>(klass);
       } else {
@@ -215,6 +216,7 @@
       }
     }
   }
+  DCHECK_NE(desired_load_kind, static_cast<HLoadClass::LoadKind>(-1));
 
   if (is_in_boot_image) {
     load_class->MarkInBootImage();
@@ -245,7 +247,7 @@
       load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index);
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheAddress:
+    case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK_NE(address, 0u);
       load_class->SetLoadKindWithAddress(load_kind, address);
       break;
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index d35ae66..7418954 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -35,11 +35,13 @@
   HSharpening(HGraph* graph,
               CodeGenerator* codegen,
               const DexCompilationUnit& compilation_unit,
-              CompilerDriver* compiler_driver)
+              CompilerDriver* compiler_driver,
+              VariableSizedHandleScope* handles)
       : HOptimization(graph, kSharpeningPassName),
         codegen_(codegen),
         compilation_unit_(compilation_unit),
-        compiler_driver_(compiler_driver) { }
+        compiler_driver_(compiler_driver),
+        handles_(handles) { }
 
   void Run() OVERRIDE;
 
@@ -53,6 +55,7 @@
   CodeGenerator* codegen_;
   const DexCompilationUnit& compilation_unit_;
   CompilerDriver* compiler_driver_;
+  VariableSizedHandleScope* handles_;
 };
 
 }  // namespace art
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index ac24ee9..e7edf96 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -23,6 +23,8 @@
 #include <iterator>
 #include <sys/stat.h>
 
+#include "android-base/strings.h"
+
 #include "common_runtime_test.h"  // For ScratchFile
 #include "utils.h"
 
@@ -221,7 +223,7 @@
     args.push_back("-o");
     args.push_back(to_file);
     args.push_back(from_file);
-    std::string cmd = Join(args, ' ');
+    std::string cmd = android::base::Join(args, ' ');
 
     args.clear();
     args.push_back("/bin/sh");
@@ -257,7 +259,7 @@
     args.push_back(file);
     args.push_back(">");
     args.push_back(file+".dump");
-    std::string cmd = Join(args, ' ');
+    std::string cmd = android::base::Join(args, ' ');
 
     args.clear();
     args.push_back("/bin/sh");
@@ -338,7 +340,7 @@
     args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
     args.push_back(">");
     args.push_back(file+".dis");
-    std::string cmd = Join(args, ' ');
+    std::string cmd = android::base::Join(args, ' ');
 
     args.clear();
     args.push_back("/bin/sh");
@@ -500,7 +502,7 @@
     std::string tmp_file = GetTmpnam();
     args.push_back(">");
     args.push_back(tmp_file);
-    std::string sh_args = Join(args, ' ');
+    std::string sh_args = android::base::Join(args, ' ');
 
     args.clear();
     args.push_back("/bin/sh");
@@ -541,7 +543,7 @@
     args.push_back("sort");
     args.push_back(">");
     args.push_back(tmp_file);
-    std::string sh_args = Join(args, ' ');
+    std::string sh_args = android::base::Join(args, ' ');
 
     args.clear();
     args.push_back("/bin/sh");
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 264be99..5a0f0c6 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -33,6 +33,8 @@
 #include <sys/utsname.h>
 #endif
 
+#include "android-base/strings.h"
+
 #include "arch/instruction_set_features.h"
 #include "arch/mips/instruction_set_features_mips.h"
 #include "art_method-inl.h"
@@ -96,7 +98,7 @@
   for (int i = 0; i < original_argc; ++i) {
     command.push_back(original_argv[i]);
   }
-  return Join(command, ' ');
+  return android::base::Join(command, ' ');
 }
 
 // A stripped version. Remove some less essential parameters. If we see a "--zip-fd=" parameter, be
@@ -108,7 +110,7 @@
   // Do a pre-pass to look for zip-fd.
   bool saw_zip_fd = false;
   for (int i = 0; i < original_argc; ++i) {
-    if (StartsWith(original_argv[i], "--zip-fd=")) {
+    if (android::base::StartsWith(original_argv[i], "--zip-fd=")) {
       saw_zip_fd = true;
       break;
     }
@@ -123,17 +125,17 @@
     }
 
     // Any instruction-setXXX is dropped.
-    if (StartsWith(original_argv[i], "--instruction-set")) {
+    if (android::base::StartsWith(original_argv[i], "--instruction-set")) {
       continue;
     }
 
     // The boot image is dropped.
-    if (StartsWith(original_argv[i], "--boot-image=")) {
+    if (android::base::StartsWith(original_argv[i], "--boot-image=")) {
       continue;
     }
 
     // The image format is dropped.
-    if (StartsWith(original_argv[i], "--image-format=")) {
+    if (android::base::StartsWith(original_argv[i], "--image-format=")) {
       continue;
     }
 
@@ -142,11 +144,11 @@
     // However, we prefer to drop this when we saw --zip-fd.
     if (saw_zip_fd) {
       // Drop anything --zip-X, --dex-X, --oat-X, --swap-X, or --app-image-X
-      if (StartsWith(original_argv[i], "--zip-") ||
-          StartsWith(original_argv[i], "--dex-") ||
-          StartsWith(original_argv[i], "--oat-") ||
-          StartsWith(original_argv[i], "--swap-") ||
-          StartsWith(original_argv[i], "--app-image-")) {
+      if (android::base::StartsWith(original_argv[i], "--zip-") ||
+          android::base::StartsWith(original_argv[i], "--dex-") ||
+          android::base::StartsWith(original_argv[i], "--oat-") ||
+          android::base::StartsWith(original_argv[i], "--swap-") ||
+          android::base::StartsWith(original_argv[i], "--app-image-")) {
         continue;
       }
     }
@@ -159,7 +161,7 @@
     // It seems only "/system/bin/dex2oat" is left, or not even that. Use a pretty line.
     return "Starting dex2oat.";
   }
-  return Join(command, ' ');
+  return android::base::Join(command, ' ');
 }
 
 static void UsageErrorV(const char* fmt, va_list ap) {
@@ -518,6 +520,7 @@
       runtime_(nullptr),
       thread_count_(sysconf(_SC_NPROCESSORS_CONF)),
       start_ns_(NanoTime()),
+      start_cputime_ns_(ProcessCpuNanoTime()),
       oat_fd_(-1),
       input_vdex_fd_(-1),
       output_vdex_fd_(-1),
@@ -998,7 +1001,7 @@
       if (last_dex_dot != std::string::npos) {
         dex_file = dex_file.substr(0, last_dex_dot);
       }
-      if (StartsWith(dex_file, "core-")) {
+      if (android::base::StartsWith(dex_file, "core-")) {
         infix = dex_file.substr(strlen("core"));
       }
     }
@@ -1058,7 +1061,7 @@
         in.insert(last_dot, infix);
       }
     }
-    if (EndsWith(in, ".jar")) {
+    if (android::base::EndsWith(in, ".jar")) {
       in = in.substr(0, in.length() - strlen(".jar")) +
           (replace_suffix != nullptr ? replace_suffix : "");
     }
@@ -1483,7 +1486,7 @@
         for (const gc::space::ImageSpace* image_space : image_spaces) {
           image_filenames.push_back(image_space->GetImageFilename());
         }
-        std::string image_file_location = Join(image_filenames, ':');
+        std::string image_file_location = android::base::Join(image_filenames, ':');
         if (!image_file_location.empty()) {
           key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
         }
@@ -1686,7 +1689,7 @@
               }
             }
 
-            if (StartsWith(dex_location, filter.c_str())) {
+            if (android::base::StartsWith(dex_location, filter.c_str())) {
               VLOG(compiler) << "Disabling inlining from " << dex_file->GetLocation();
               no_inline_from_dex_files_.push_back(dex_file);
               break;
@@ -2361,10 +2364,10 @@
     RuntimeOptions raw_options;
     if (boot_image_filename_.empty()) {
       std::string boot_class_path = "-Xbootclasspath:";
-      boot_class_path += Join(dex_filenames_, ':');
+      boot_class_path += android::base::Join(dex_filenames_, ':');
       raw_options.push_back(std::make_pair(boot_class_path, nullptr));
       std::string boot_class_path_locations = "-Xbootclasspath-locations:";
-      boot_class_path_locations += Join(dex_locations_, ':');
+      boot_class_path_locations += android::base::Join(dex_locations_, ':');
       raw_options.push_back(std::make_pair(boot_class_path_locations, nullptr));
     } else {
       std::string boot_image_option = "-Ximage:";
@@ -2578,7 +2581,7 @@
     while (in_stream.good()) {
       std::string dot;
       std::getline(in_stream, dot);
-      if (StartsWith(dot, "#") || dot.empty()) {
+      if (android::base::StartsWith(dot, "#") || dot.empty()) {
         continue;
       }
       if (process != nullptr) {
@@ -2595,7 +2598,9 @@
     // Note: when creation of a runtime fails, e.g., when trying to compile an app but when there
     //       is no image, there won't be a Runtime::Current().
     // Note: driver creation can fail when loading an invalid dex file.
-    LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
+    LOG(INFO) << "dex2oat took "
+              << PrettyDuration(NanoTime() - start_ns_)
+              << " (" << PrettyDuration(ProcessCpuNanoTime() - start_cputime_ns_) << " cpu)"
               << " (threads: " << thread_count_ << ") "
               << ((Runtime::Current() != nullptr && driver_ != nullptr) ?
                   driver_->GetMemoryUsageString(kIsDebugBuild || VLOG_IS_ON(compiler)) :
@@ -2643,6 +2648,7 @@
 
   size_t thread_count_;
   uint64_t start_ns_;
+  uint64_t start_cputime_ns_;
   std::unique_ptr<WatchDog> watchdog_;
   std::vector<std::unique_ptr<File>> oat_files_;
   std::vector<std::unique_ptr<File>> vdex_files_;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 80c7113..e4462d8 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -26,6 +26,8 @@
 #include <unordered_set>
 #include <vector>
 
+#include "android-base/strings.h"
+
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
@@ -668,6 +670,12 @@
     }
 
   private:
+    // All of the elements from one container to another.
+    template <typename Dest, typename Src>
+    static void AddAll(Dest& dest, const Src& src) {
+      dest.insert(src.begin(), src.end());
+    }
+
     void WalkClass(const DexFile& dex_file, const DexFile::ClassDef& class_def) {
       const uint8_t* class_data = dex_file.GetClassData(class_def);
       if (class_data == nullptr) {  // empty class such as a marker interface?
@@ -2952,7 +2960,7 @@
           table_index++;
 
           std::string p_name = ptr2->PrettyMethod(true);
-          if (StartsWith(p_name, method.c_str())) {
+          if (android::base::StartsWith(p_name, method.c_str())) {
             std::cerr << "  Slot "
                       << index
                       << " ("
@@ -2965,7 +2973,7 @@
         }
       } else {
         std::string p_name = ptr->PrettyMethod(true);
-        if (StartsWith(p_name, method.c_str())) {
+        if (android::base::StartsWith(p_name, method.c_str())) {
           std::cerr << "  Slot " << index << " (1)" << std::endl;
           std::cerr << "    " << p_name << std::endl;
         } else {
@@ -2978,7 +2986,7 @@
               for (ArtMethod& iface_method : iface->GetMethods(pointer_size)) {
                 if (ImTable::GetImtIndex(&iface_method) == index) {
                   std::string i_name = iface_method.PrettyMethod(true);
-                  if (StartsWith(i_name, method.c_str())) {
+                  if (android::base::StartsWith(i_name, method.c_str())) {
                     std::cerr << "  Slot " << index << " (1)" << std::endl;
                     std::cerr << "    " << p_name << " (" << i_name << ")" << std::endl;
                   }
@@ -2997,7 +3005,7 @@
     while (in_stream.good()) {
       std::string dot;
       std::getline(in_stream, dot);
-      if (StartsWith(dot, "#") || dot.empty()) {
+      if (android::base::StartsWith(dot, "#") || dot.empty()) {
         continue;
       }
       output.push_back(dot);
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index 22db818..a2eba45 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -18,6 +18,8 @@
 #include <string>
 #include <vector>
 
+#include "android-base/strings.h"
+
 #include "common_runtime_test.h"
 
 #include "base/stringprintf.h"
@@ -143,7 +145,7 @@
       }
       argv.push_back(nullptr);
       UNUSED(execv(argv[0], &argv[0]));
-      const std::string command_line(Join(exec_argv, ' '));
+      const std::string command_line(android::base::Join(exec_argv, ' '));
       PLOG(ERROR) << "Failed to execv(" << command_line << ")";
       // _exit to avoid atexit handlers in child.
       _exit(1);
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index cb5a790..62d1ddf 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -24,6 +24,8 @@
 #include <string>
 #include <vector>
 
+#include "android-base/strings.h"
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/dumpable.h"
@@ -286,8 +288,8 @@
       std::string converted_image_filename = space->GetImageLocation();
       std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
       std::string output_image_filename = output_directory +
-                                          (StartsWith(converted_image_filename, "/") ? "" : "/") +
-                                          converted_image_filename;
+          (android::base::StartsWith(converted_image_filename, "/") ? "" : "/") +
+          converted_image_filename;
       std::string output_vdex_filename =
           ImageHeader::GetVdexLocationFromImageLocation(output_image_filename);
       std::string output_oat_filename =
@@ -343,8 +345,8 @@
     std::string converted_image_filename = space->GetImageLocation();
     std::replace(converted_image_filename.begin() + 1, converted_image_filename.end(), '/', '@');
     std::string output_image_filename = output_directory +
-                                        (StartsWith(converted_image_filename, "/") ? "" : "/") +
-                                        converted_image_filename;
+        (android::base::StartsWith(converted_image_filename, "/") ? "" : "/") +
+        converted_image_filename;
     bool new_oat_out;
     std::unique_ptr<File>
         output_image_file(CreateOrOpen(output_image_filename.c_str(), &new_oat_out));
@@ -932,7 +934,7 @@
   for (int i = 0; i < orig_argc; ++i) {
     command.push_back(orig_argv[i]);
   }
-  return Join(command, ' ');
+  return android::base::Join(command, ' ');
 }
 
 static void UsageErrorV(const char* fmt, va_list ap) {
diff --git a/profman/profman.cc b/profman/profman.cc
index bfef834..0b2d172 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -25,6 +25,8 @@
 #include <string>
 #include <vector>
 
+#include "android-base/strings.h"
+
 #include "base/dumpable.h"
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
@@ -48,7 +50,7 @@
   for (int i = 0; i < original_argc; ++i) {
     command.push_back(original_argv[i]);
   }
-  return Join(command, ' ');
+  return android::base::Join(command, ' ');
 }
 
 static constexpr int kInvalidFd = -1;
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 08be5b2..32ebee2 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -517,6 +517,7 @@
         "base/unix_file/fd_file_test.cc",
         "cha_test.cc",
         "class_linker_test.cc",
+        "class_table_test.cc",
         "compiler_filter_test.cc",
         "dex_file_test.cc",
         "dex_file_verifier_test.cc",
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index c81a93c..f264b82 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -24,6 +24,8 @@
 #include "signal.h"
 #include <fstream>
 
+#include "android-base/strings.h"
+
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
 
@@ -271,7 +273,7 @@
   bool has_atomic_ldrd_strd = has_atomic_ldrd_strd_;
   bool has_div = has_div_;
   for (auto i = features.begin(); i != features.end(); i++) {
-    std::string feature = Trim(*i);
+    std::string feature = android::base::Trim(*i);
     if (feature == "div") {
       has_div = true;
     } else if (feature == "-div") {
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index 4e7dea3..f7b5a76 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <sstream>
 
+#include "android-base/strings.h"
+
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
@@ -137,7 +139,7 @@
     const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
   bool is_a53 = fix_cortex_a53_835769_;
   for (auto i = features.begin(); i != features.end(); i++) {
-    std::string feature = Trim(*i);
+    std::string feature = android::base::Trim(*i);
     if (feature == "a53") {
       is_a53 = true;
     } else if (feature == "-a53") {
diff --git a/runtime/arch/instruction_set_features.cc b/runtime/arch/instruction_set_features.cc
index b32391f..db004e7 100644
--- a/runtime/arch/instruction_set_features.cc
+++ b/runtime/arch/instruction_set_features.cc
@@ -16,6 +16,8 @@
 
 #include "instruction_set_features.h"
 
+#include "android-base/strings.h"
+
 #include "base/casts.h"
 #include "utils.h"
 
@@ -224,7 +226,7 @@
       *error_msg = "Unexpected instruction set features after 'default'";
       return std::unique_ptr<const InstructionSetFeatures>();
     }
-    std::string feature = Trim(*it);
+    std::string feature = android::base::Trim(*it);
     bool erase = false;
     if (feature == "default") {
       if (!first) {
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index a95b6f6..a65c967 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <sstream>
 
+#include "android-base/strings.h"
+
 #include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
@@ -210,7 +212,7 @@
   bool mips_isa_gte2 = mips_isa_gte2_;
   bool r6 = r6_;
   for (auto i = features.begin(); i != features.end(); i++) {
-    std::string feature = Trim(*i);
+    std::string feature = android::base::Trim(*i);
     if (feature == "fpu32") {
       fpu_32bit = true;
     } else if (feature == "-fpu32") {
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.cc b/runtime/arch/mips64/instruction_set_features_mips64.cc
index 490a8d2..e564d1e 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64.cc
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <sstream>
 
+#include "android-base/strings.h"
+
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
 
@@ -105,7 +107,7 @@
   auto i = features.begin();
   if (i != features.end()) {
     // We don't have any features.
-    std::string feature = Trim(*i);
+    std::string feature = android::base::Trim(*i);
     *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
     return nullptr;
   }
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 90b55a9..cc102ec 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -19,6 +19,8 @@
 #include <fstream>
 #include <sstream>
 
+#include "android-base/strings.h"
+
 #include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
@@ -293,7 +295,7 @@
   bool has_AVX2 = has_AVX2_;
   bool has_POPCNT = has_POPCNT_;
   for (auto i = features.begin(); i != features.end(); i++) {
-    std::string feature = Trim(*i);
+    std::string feature = android::base::Trim(*i);
     if (feature == "ssse3") {
       has_SSSE3 = true;
     } else if (feature == "-ssse3") {
diff --git a/runtime/base/time_utils.cc b/runtime/base/time_utils.cc
index 3e5bac8..57f198d 100644
--- a/runtime/base/time_utils.cc
+++ b/runtime/base/time_utils.cc
@@ -167,6 +167,17 @@
 #endif
 }
 
+uint64_t ProcessCpuNanoTime() {
+#if defined(__linux__)
+  timespec now;
+  clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &now);
+  return static_cast<uint64_t>(now.tv_sec) * UINT64_C(1000000000) + now.tv_nsec;
+#else
+  UNIMPLEMENTED(WARNING);
+  return -1;
+#endif
+}
+
 void NanoSleep(uint64_t ns) {
   timespec tm;
   tm.tv_sec = ns / MsToNs(1000);
diff --git a/runtime/base/time_utils.h b/runtime/base/time_utils.h
index 383b52f..dbb8bcd 100644
--- a/runtime/base/time_utils.h
+++ b/runtime/base/time_utils.h
@@ -62,6 +62,9 @@
 // Returns the thread-specific CPU-time clock in nanoseconds or -1 if unavailable.
 uint64_t ThreadCpuNanoTime();
 
+// Returns the process CPU-time clock in nanoseconds or -1 if unavailable.
+uint64_t ProcessCpuNanoTime();
+
 // Converts the given number of nanoseconds to milliseconds.
 static constexpr inline uint64_t NsToMs(uint64_t ns) {
   return ns / 1000 / 1000;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 862585a..685677b 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -19,6 +19,8 @@
 #include <memory>
 #include <string>
 
+#include "android-base/strings.h"
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
@@ -800,12 +802,12 @@
   jobject jclass_loader = LoadDex("Nested");
   std::vector<const DexFile*> dex_files(GetDexFiles(jclass_loader));
   ASSERT_EQ(dex_files.size(), 1U);
-  EXPECT_TRUE(EndsWith(dex_files[0]->GetLocation(), "Nested.jar"));
+  EXPECT_TRUE(android::base::EndsWith(dex_files[0]->GetLocation(), "Nested.jar"));
 
   jobject jclass_loader2 = LoadDex("MultiDex");
   std::vector<const DexFile*> dex_files2(GetDexFiles(jclass_loader2));
   ASSERT_EQ(dex_files2.size(), 2U);
-  EXPECT_TRUE(EndsWith(dex_files2[0]->GetLocation(), "MultiDex.jar"));
+  EXPECT_TRUE(android::base::EndsWith(dex_files2[0]->GetLocation(), "MultiDex.jar"));
 }
 
 TEST_F(ClassLinkerTest, FindClassNested) {
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index 229cd47..dfe8949 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -71,6 +71,19 @@
   return true;
 }
 
+template <typename Visitor>
+bool ClassTable::Visit(const Visitor& visitor) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  for (ClassSet& class_set : classes_) {
+    for (TableSlot& table_slot : class_set) {
+      if (!visitor(table_slot.Read())) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 template<ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ClassTable::TableSlot::Read() const {
   const uint32_t before = data_.LoadRelaxed();
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index ec33e5e..0f985c6 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -33,8 +33,9 @@
 
 bool ClassTable::Contains(ObjPtr<mirror::Class> klass) {
   ReaderMutexLock mu(Thread::Current(), lock_);
+  TableSlot slot(klass);
   for (ClassSet& class_set : classes_) {
-    auto it = class_set.Find(TableSlot(klass));
+    auto it = class_set.Find(slot);
     if (it != class_set.end()) {
       return it->Read() == klass;
     }
@@ -44,8 +45,9 @@
 
 mirror::Class* ClassTable::LookupByDescriptor(ObjPtr<mirror::Class> klass) {
   ReaderMutexLock mu(Thread::Current(), lock_);
+  TableSlot slot(klass);
   for (ClassSet& class_set : classes_) {
-    auto it = class_set.Find(TableSlot(klass));
+    auto it = class_set.Find(slot);
     if (it != class_set.end()) {
       return it->Read();
     }
@@ -110,8 +112,8 @@
 }
 
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
-  ReaderMutexLock mu(Thread::Current(), lock_);
   DescriptorHashPair pair(descriptor, hash);
+  ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.FindWithHash(pair, hash);
     if (it != class_set.end()) {
@@ -122,12 +124,14 @@
 }
 
 void ClassTable::Insert(ObjPtr<mirror::Class> klass) {
+  const uint32_t hash = TableSlot::HashDescriptor(klass);
   WriterMutexLock mu(Thread::Current(), lock_);
-  classes_.back().Insert(TableSlot(klass));
+  classes_.back().InsertWithHash(TableSlot(klass, hash), hash);
 }
 
 void ClassTable::InsertWithoutLocks(ObjPtr<mirror::Class> klass) {
-  classes_.back().Insert(TableSlot(klass));
+  const uint32_t hash = TableSlot::HashDescriptor(klass);
+  classes_.back().InsertWithHash(TableSlot(klass, hash), hash);
 }
 
 void ClassTable::InsertWithHash(ObjPtr<mirror::Class> klass, size_t hash) {
@@ -136,8 +140,8 @@
 }
 
 bool ClassTable::Remove(const char* descriptor) {
-  WriterMutexLock mu(Thread::Current(), lock_);
   DescriptorHashPair pair(descriptor, ComputeModifiedUtf8Hash(descriptor));
+  WriterMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(pair);
     if (it != class_set.end()) {
@@ -250,10 +254,12 @@
   strong_roots_.clear();
 }
 
-ClassTable::TableSlot::TableSlot(ObjPtr<mirror::Class> klass) {
+ClassTable::TableSlot::TableSlot(ObjPtr<mirror::Class> klass)
+    : TableSlot(klass, HashDescriptor(klass)) {}
+
+uint32_t ClassTable::TableSlot::HashDescriptor(ObjPtr<mirror::Class> klass) {
   std::string temp;
-  data_.StoreRelaxed(Encode(klass.Ptr(),
-                            MaskHash(ComputeModifiedUtf8Hash(klass->GetDescriptor(&temp)))));
+  return ComputeModifiedUtf8Hash(klass->GetDescriptor(&temp));
 }
 
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 104871f..f27d809 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -73,6 +73,9 @@
       return MaskHash(other) == Hash();
     }
 
+    static uint32_t HashDescriptor(ObjPtr<mirror::Class> klass)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+
     template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
     mirror::Class* Read() const REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -174,6 +177,10 @@
   bool Visit(Visitor& visitor)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  template <typename Visitor>
+  bool Visit(const Visitor& visitor)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor. Returns null if there are none.
   mirror::Class* Lookup(const char* descriptor, size_t hash)
diff --git a/runtime/class_table_test.cc b/runtime/class_table_test.cc
new file mode 100644
index 0000000..f1248eb
--- /dev/null
+++ b/runtime/class_table_test.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_table-inl.h"
+
+#include "art_field-inl.h"
+#include "art_method-inl.h"
+#include "class_linker-inl.h"
+#include "common_runtime_test.h"
+#include "dex_file.h"
+#include "gc/accounting/card_table-inl.h"
+#include "gc/heap.h"
+#include "handle_scope-inl.h"
+#include "mirror/class-inl.h"
+#include "obj_ptr.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace art {
+namespace mirror {
+
+class CollectRootVisitor {
+ public:
+  CollectRootVisitor() {}
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(GcRoot<MirrorType>& root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (!root.IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<MirrorType>* root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (!root->IsNull()) {
+      VisitRoot(root);
+    }
+  }
+
+  template <class MirrorType>
+  void VisitRoot(GcRoot<MirrorType>& root) const REQUIRES_SHARED(Locks::mutator_lock_) {
+    VisitRoot(root.AddressWithoutBarrier());
+  }
+
+  template <class MirrorType>
+  void VisitRoot(mirror::CompressedReference<MirrorType>* root) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    roots_.insert(root->AsMirrorPtr());
+  }
+
+  mutable std::set<mirror::Object*> roots_;
+};
+
+
+class ClassTableTest : public CommonRuntimeTest {};
+
+TEST_F(ClassTableTest, ClassTable) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadDex("XandY");
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<ClassLoader> class_loader(hs.NewHandle(soa.Decode<ClassLoader>(jclass_loader)));
+  const char* descriptor_x = "LX;";
+  const char* descriptor_y = "LY;";
+  Handle<mirror::Class> h_X(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), descriptor_x, class_loader)));
+  Handle<mirror::Class> h_Y(
+      hs.NewHandle(class_linker_->FindClass(soa.Self(), descriptor_y, class_loader)));
+  Handle<mirror::Object> obj_X = hs.NewHandle(h_X->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj_X.Get() != nullptr);
+  ClassTable table;
+  EXPECT_EQ(table.NumZygoteClasses(class_loader.Get()), 0u);
+  EXPECT_EQ(table.NumNonZygoteClasses(class_loader.Get()), 0u);
+
+  // Add h_X to the class table.
+  table.Insert(h_X.Get());
+  EXPECT_EQ(table.LookupByDescriptor(h_X.Get()), h_X.Get());
+  EXPECT_EQ(table.Lookup(descriptor_x, ComputeModifiedUtf8Hash(descriptor_x)), h_X.Get());
+  EXPECT_EQ(table.Lookup("NOT_THERE", ComputeModifiedUtf8Hash("NOT_THERE")), nullptr);
+  EXPECT_EQ(table.NumZygoteClasses(class_loader.Get()), 0u);
+  EXPECT_EQ(table.NumNonZygoteClasses(class_loader.Get()), 1u);
+
+  // Create the zygote snapshot and ensure the accounting is correct.
+  table.FreezeSnapshot();
+  EXPECT_EQ(table.NumZygoteClasses(class_loader.Get()), 1u);
+  EXPECT_EQ(table.NumNonZygoteClasses(class_loader.Get()), 0u);
+
+  // Test inserting and related lookup functions.
+  EXPECT_EQ(table.LookupByDescriptor(h_Y.Get()), nullptr);
+  EXPECT_FALSE(table.Contains(h_Y.Get()));
+  table.Insert(h_Y.Get());
+  EXPECT_EQ(table.LookupByDescriptor(h_X.Get()), h_X.Get());
+  EXPECT_EQ(table.LookupByDescriptor(h_Y.Get()), h_Y.Get());
+  EXPECT_TRUE(table.Contains(h_X.Get()));
+  EXPECT_TRUE(table.Contains(h_Y.Get()));
+
+  EXPECT_EQ(table.NumZygoteClasses(class_loader.Get()), 1u);
+  EXPECT_EQ(table.NumNonZygoteClasses(class_loader.Get()), 1u);
+
+  // Test adding / clearing strong roots.
+  EXPECT_TRUE(table.InsertStrongRoot(obj_X.Get()));
+  EXPECT_FALSE(table.InsertStrongRoot(obj_X.Get()));
+  table.ClearStrongRoots();
+  EXPECT_TRUE(table.InsertStrongRoot(obj_X.Get()));
+
+  // Collect all the roots and make sure there is nothing missing.
+  CollectRootVisitor roots;
+  table.VisitRoots(roots);
+  EXPECT_TRUE(roots.roots_.find(h_X.Get()) != roots.roots_.end());
+  EXPECT_TRUE(roots.roots_.find(h_Y.Get()) != roots.roots_.end());
+  EXPECT_TRUE(roots.roots_.find(obj_X.Get()) != roots.roots_.end());
+
+  // Checks that vising only classes works.
+  std::set<mirror::Class*> classes;
+  table.Visit([&classes](ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+    classes.insert(klass.Ptr());
+    return true;
+  });
+  EXPECT_TRUE(classes.find(h_X.Get()) != classes.end());
+  EXPECT_TRUE(classes.find(h_Y.Get()) != classes.end());
+  EXPECT_EQ(classes.size(), 2u);
+  classes.clear();
+  table.Visit([&classes](ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+    classes.insert(klass.Ptr());
+    // Return false to exit the Visit early.
+    return false;
+  });
+  EXPECT_EQ(classes.size(), 1u);
+
+  // Test remove.
+  table.Remove(descriptor_x);
+  EXPECT_FALSE(table.Contains(h_X.Get()));
+
+  // Test that WriteToMemory and ReadFromMemory work.
+  table.Insert(h_X.Get());
+  const size_t count = table.WriteToMemory(nullptr);
+  std::unique_ptr<uint8_t[]> buffer(new uint8_t[count]());
+  ASSERT_EQ(table.WriteToMemory(&buffer[0]), count);
+  ClassTable table2;
+  size_t count2 = table2.ReadFromMemory(&buffer[0]);
+  EXPECT_EQ(count, count2);
+  // Strong roots are not serialized, only classes.
+  EXPECT_TRUE(table2.Contains(h_X.Get()));
+  EXPECT_TRUE(table2.Contains(h_Y.Get()));
+
+  // TODO: Add tests for UpdateClass, InsertOatFile.
+}
+
+}  // namespace mirror
+}  // namespace art
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 2ea7bb6..ee0f340 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -20,6 +20,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "android-base/strings.h"
+
 #include "arch/instruction_set.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
@@ -1451,7 +1453,7 @@
       section_headers_original_indexes.push_back(0);
       continue;
     }
-    if (StartsWith(name, ".debug")
+    if (android::base::StartsWith(name, ".debug")
         || (strcmp(name, ".strtab") == 0)
         || (strcmp(name, ".symtab") == 0)) {
       continue;
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index fbab73f..b889913 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1360,9 +1360,10 @@
         << " is_marked=" << IsMarked(to_ref);
   }
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
+  mirror::Object* referent = nullptr;
   if (UNLIKELY((to_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()->IsTypeOfReferenceClass() &&
-                to_ref->AsReference()->GetReferent<kWithoutReadBarrier>() != nullptr &&
-                !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
+                (referent = to_ref->AsReference()->GetReferent<kWithoutReadBarrier>()) != nullptr &&
+                !IsInToSpace(referent)))) {
     // Leave this reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to white later in ReferenceQueue::DequeuePendingReference().
     DCHECK(to_ref->AsReference()->GetPendingNext() != nullptr) << "Left unenqueued ref gray " << to_ref;
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index c726944..76f3692 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -22,6 +22,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "android-base/strings.h"
+
 #include "art_method.h"
 #include "base/enums.h"
 #include "base/macros.h"
@@ -137,7 +139,7 @@
     arg_vector.push_back(compiler_options[i].c_str());
   }
 
-  std::string command_line(Join(arg_vector, ' '));
+  std::string command_line(android::base::Join(arg_vector, ' '));
   LOG(INFO) << "GenerateImage: " << command_line;
   return Exec(arg_vector, error_msg);
 }
@@ -257,7 +259,7 @@
   argv.push_back(instruction_set_arg);
   argv.push_back(base_offset_arg);
 
-  std::string command_line(Join(argv, ' '));
+  std::string command_line(android::base::Join(argv, ' '));
   LOG(INFO) << "RelocateImage: " << command_line;
   return Exec(argv, error_msg);
 }
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index b795409..79e80f1 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -86,6 +86,22 @@
   return os;
 }
 
+// Not all combinations of flags are valid. You may not visit all roots as well as the new roots
+// (no logical reason to do this). You also may not start logging new roots and stop logging new
+// roots (also no logical reason to do this).
+//
+// The precise flag ensures that more metadata is supplied. An example is vreg data for compiled
+// method frames.
+enum VisitRootFlags : uint8_t {
+  kVisitRootFlagAllRoots = 0x1,
+  kVisitRootFlagNewRoots = 0x2,
+  kVisitRootFlagStartLoggingNewRoots = 0x4,
+  kVisitRootFlagStopLoggingNewRoots = 0x8,
+  kVisitRootFlagClearRootLog = 0x10,
+  kVisitRootFlagClassLoader = 0x20,
+  kVisitRootFlagPrecise = 0x80,
+};
+
 class RootVisitor {
  public:
   virtual ~RootVisitor() { }
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 52eacd5..b0d7fb2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -92,6 +92,16 @@
     }                                                                                          \
   } while (false)
 
+#define HANDLE_BACKWARD_BRANCH(offset)                                                         \
+  do {                                                                                         \
+    if (IsBackwardBranch(offset)) {                                                            \
+      HOTNESS_UPDATE();                                                                        \
+      /* Record new dex pc early to have consistent suspend point at loop header. */           \
+      shadow_frame.SetDexPC(inst->GetDexPc(insns));                                            \
+      self->AllowThreadSuspension();                                                           \
+    }                                                                                          \
+  } while (false)
+
 template<bool do_access_check, bool transaction_active>
 JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register,
@@ -594,55 +604,40 @@
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
         BRANCH_INSTRUMENTATION(offset);
-        if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
-          self->AllowThreadSuspension();
-        }
         inst = inst->RelativeAt(offset);
+        HANDLE_BACKWARD_BRANCH(offset);
         break;
       }
       case Instruction::GOTO_16: {
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
         BRANCH_INSTRUMENTATION(offset);
-        if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
-          self->AllowThreadSuspension();
-        }
         inst = inst->RelativeAt(offset);
+        HANDLE_BACKWARD_BRANCH(offset);
         break;
       }
       case Instruction::GOTO_32: {
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
         BRANCH_INSTRUMENTATION(offset);
-        if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
-          self->AllowThreadSuspension();
-        }
         inst = inst->RelativeAt(offset);
+        HANDLE_BACKWARD_BRANCH(offset);
         break;
       }
       case Instruction::PACKED_SWITCH: {
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
-        if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
-          self->AllowThreadSuspension();
-        }
         inst = inst->RelativeAt(offset);
+        HANDLE_BACKWARD_BRANCH(offset);
         break;
       }
       case Instruction::SPARSE_SWITCH: {
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         BRANCH_INSTRUMENTATION(offset);
-        if (IsBackwardBranch(offset)) {
-          HOTNESS_UPDATE();
-          self->AllowThreadSuspension();
-        }
         inst = inst->RelativeAt(offset);
+        HANDLE_BACKWARD_BRANCH(offset);
         break;
       }
 
@@ -739,11 +734,8 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -756,11 +748,8 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -773,11 +762,8 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -790,11 +776,8 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -807,11 +790,8 @@
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -824,11 +804,8 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -840,11 +817,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -856,11 +830,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -872,11 +843,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -888,11 +856,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -904,11 +869,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
@@ -920,11 +882,8 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
           BRANCH_INSTRUMENTATION(offset);
-          if (IsBackwardBranch(offset)) {
-            HOTNESS_UPDATE();
-            self->AllowThreadSuspension();
-          }
           inst = inst->RelativeAt(offset);
+          HANDLE_BACKWARD_BRANCH(offset);
         } else {
           BRANCH_INSTRUMENTATION(2);
           inst = inst->Next_2xx();
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 93f50ad..1b0ad83 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -297,10 +297,11 @@
     ObjPtr<mirror::Object> object = roots->Get(i);
     if (kIsDebugBuild) {
       // Ensure the string is strongly interned. b/32995596
-      CHECK(object->IsString());
-      ObjPtr<mirror::String> str = reinterpret_cast<mirror::String*>(object.Ptr());
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      CHECK(class_linker->GetInternTable()->LookupStrong(Thread::Current(), str) != nullptr);
+      if (object->IsString()) {
+        ObjPtr<mirror::String> str = reinterpret_cast<mirror::String*>(object.Ptr());
+        ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+        CHECK(class_linker->GetInternTable()->LookupStrong(Thread::Current(), str) != nullptr);
+      }
     }
     gc_roots[i] = GcRoot<mirror::Object>(object);
   }
@@ -316,6 +317,31 @@
   return data - ComputeRootTableSize(roots);
 }
 
+// Helper for the GC to process a weak class in a JIT root table.
+static inline void ProcessWeakClass(GcRoot<mirror::Class>* root_ptr, IsMarkedVisitor* visitor)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // This does not need a read barrier because this is called by GC.
+  mirror::Class* cls = root_ptr->Read<kWithoutReadBarrier>();
+  if (cls != nullptr) {
+    DCHECK((cls->IsClass<kDefaultVerifyFlags, kWithoutReadBarrier>()));
+    // Look at the classloader of the class to know if it has been unloaded.
+    // This does not need a read barrier because this is called by GC.
+    mirror::Object* class_loader =
+        cls->GetClassLoader<kDefaultVerifyFlags, kWithoutReadBarrier>();
+    if (class_loader == nullptr || visitor->IsMarked(class_loader) != nullptr) {
+      // The class loader is live, update the entry if the class has moved.
+      mirror::Class* new_cls = down_cast<mirror::Class*>(visitor->IsMarked(cls));
+      // Note that new_object can be null for CMS and newly allocated objects.
+      if (new_cls != nullptr && new_cls != cls) {
+        *root_ptr = GcRoot<mirror::Class>(new_cls);
+      }
+    } else {
+      // The class loader is not live, clear the entry.
+      *root_ptr = GcRoot<mirror::Class>(nullptr);
+    }
+  }
+}
+
 void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
   MutexLock mu(Thread::Current(), lock_);
   for (const auto& entry : method_code_map_) {
@@ -325,17 +351,22 @@
     for (uint32_t i = 0; i < number_of_roots; ++i) {
       // This does not need a read barrier because this is called by GC.
       mirror::Object* object = roots[i].Read<kWithoutReadBarrier>();
-      DCHECK(object != nullptr);
-      mirror::Object* new_object = visitor->IsMarked(object);
-      // We know the string is marked because it's a strongly-interned string that
-      // is always alive. The IsMarked implementation of the CMS collector returns
-      // null for newly allocated objects, but we know those haven't moved. Therefore,
-      // only update the entry if we get a different non-null string.
-      // TODO: Do not use IsMarked for j.l.Class, and adjust once we move this method
-      // out of the weak access/creation pause. b/32167580
-      if (new_object != nullptr && new_object != object) {
-        DCHECK(new_object->IsString());
-        roots[i] = GcRoot<mirror::Object>(new_object);
+      if (object == nullptr) {
+        // entry got deleted in a previous sweep.
+      } else if (object->IsString<kDefaultVerifyFlags, kWithoutReadBarrier>()) {
+        mirror::Object* new_object = visitor->IsMarked(object);
+        // We know the string is marked because it's a strongly-interned string that
+        // is always alive. The IsMarked implementation of the CMS collector returns
+        // null for newly allocated objects, but we know those haven't moved. Therefore,
+        // only update the entry if we get a different non-null string.
+        // TODO: Do not use IsMarked for j.l.Class, and adjust once we move this method
+        // out of the weak access/creation pause. b/32167580
+        if (new_object != nullptr && new_object != object) {
+          DCHECK(new_object->IsString());
+          roots[i] = GcRoot<mirror::Object>(new_object);
+        }
+      } else {
+        ProcessWeakClass(reinterpret_cast<GcRoot<mirror::Class>*>(&roots[i]), visitor);
       }
     }
   }
@@ -344,26 +375,7 @@
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
       InlineCache* cache = &info->cache_[i];
       for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
-        // This does not need a read barrier because this is called by GC.
-        mirror::Class* cls = cache->classes_[j].Read<kWithoutReadBarrier>();
-        if (cls != nullptr) {
-          // Look at the classloader of the class to know if it has been
-          // unloaded.
-          // This does not need a read barrier because this is called by GC.
-          mirror::Object* class_loader =
-              cls->GetClassLoader<kDefaultVerifyFlags, kWithoutReadBarrier>();
-          if (class_loader == nullptr || visitor->IsMarked(class_loader) != nullptr) {
-            // The class loader is live, update the entry if the class has moved.
-            mirror::Class* new_cls = down_cast<mirror::Class*>(visitor->IsMarked(cls));
-            // Note that new_object can be null for CMS and newly allocated objects.
-            if (new_cls != nullptr && new_cls != cls) {
-              cache->classes_[j] = GcRoot<mirror::Class>(new_cls);
-            }
-          } else {
-            // The class loader is not live, clear the entry.
-            cache->classes_[j] = GcRoot<mirror::Class>(nullptr);
-          }
-        }
+        ProcessWeakClass(&cache->classes_[j], visitor);
       }
     }
   }
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 11d601e..025d10c 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -20,6 +20,8 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 
+#include "android-base/strings.h"
+
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/systrace.h"
@@ -412,7 +414,7 @@
   }
 
   VLOG(profiler) << "Starting profile saver using output file: " << output_filename
-      << ". Tracking: " << Join(code_paths_to_profile, ':');
+      << ". Tracking: " << android::base::Join(code_paths_to_profile, ':');
 
   instance_ = new ProfileSaver(options,
                                output_filename,
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index be8815a..a59bb7b 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -71,6 +71,8 @@
 }
 
 inline Class* DexCache::GetResolvedType(dex::TypeIndex type_idx) {
+  // It is theorized that a load acquire is not required since obtaining the resolved class will
+  // always have an address dependency or a lock.
   DCHECK_LT(type_idx.index_, NumResolvedTypes());
   return GetResolvedTypes()[type_idx.index_].Read();
 }
@@ -78,7 +80,11 @@
 inline void DexCache::SetResolvedType(dex::TypeIndex type_idx, ObjPtr<Class> resolved) {
   DCHECK_LT(type_idx.index_, NumResolvedTypes());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   // TODO default transaction support.
-  GetResolvedTypes()[type_idx.index_] = GcRoot<Class>(resolved);
+  // Use a release store for SetResolvedType. This is done to prevent other threads from seeing a
+  // class but not necessarily seeing the loaded members like the static fields array.
+  // See b/32075261.
+  reinterpret_cast<Atomic<GcRoot<mirror::Class>>&>(GetResolvedTypes()[type_idx.index_]).
+      StoreRelease(GcRoot<Class>(resolved));
   // TODO: Fine-grained marking, so that we don't need to go through all arrays in full.
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(this);
 }
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 6870fda..95516ac 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -305,8 +305,11 @@
 
 template<typename MemoryType>
 bool String::AllASCII(const MemoryType* const chars, const int length) {
+  static_assert(std::is_unsigned<MemoryType>::value, "Expecting unsigned MemoryType");
   for (int i = 0; i < length; ++i) {
-    if (chars[i] >= 0x80) {
+    // Valid ASCII characters are in range 1..0x7f. Zero is not considered ASCII
+    // because it would complicate the detection of ASCII strings in Modified-UTF8.
+    if ((chars[i] - 1u) >= 0x7fu) {
       return false;
     }
   }
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 6a62a16..7f7b1b5 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -19,6 +19,9 @@
 #include <sstream>
 
 #include <sys/stat.h>
+
+#include "android-base/strings.h"
+
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "compiler_filter.h"
@@ -456,7 +459,7 @@
   argv.push_back("--output-oat-file=" + oat_file_name);
   argv.push_back("--patched-image-location=" + image_info->location);
 
-  std::string command_line(Join(argv, ' '));
+  std::string command_line(android::base::Join(argv, ' '));
   if (!Exec(argv, error_msg)) {
     // Manually delete the file. This ensures there is no garbage left over if
     // the process unexpectedly died.
@@ -605,7 +608,7 @@
 
   argv.insert(argv.end(), args.begin(), args.end());
 
-  std::string command_line(Join(argv, ' '));
+  std::string command_line(android::base::Join(argv, ' '));
   return Exec(argv, error_msg);
 }
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 94c12af..26dbaab 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -20,6 +20,7 @@
 #include <vector>
 #include <sys/param.h>
 
+#include "android-base/strings.h"
 #include <backtrace/BacktraceMap.h>
 #include <gtest/gtest.h>
 
@@ -1057,7 +1058,7 @@
 
   // Reverse again to get the right path order, and join to get the result.
   std::reverse(target_path.begin(), target_path.end());
-  return Join(target_path, '/');
+  return android::base::Join(target_path, '/');
 }
 
 // Case: Non-absolute path to Dex location.
@@ -1134,7 +1135,7 @@
         /*dex_elements*/nullptr,
         &oat_file,
         &error_msgs);
-    CHECK(!dex_files.empty()) << Join(error_msgs, '\n');
+    CHECK(!dex_files.empty()) << android::base::Join(error_msgs, '\n');
     CHECK(dex_files[0]->GetOatDexFile() != nullptr) << dex_files[0]->GetLocation();
     loaded_oat_file_ = dex_files[0]->GetOatDexFile()->GetOatFile();
     CHECK_EQ(loaded_oat_file_, oat_file);
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index 5f18b7c..7b2521d 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -174,10 +174,11 @@
 class FollowReferencesHelper FINAL {
  public:
   FollowReferencesHelper(HeapUtil* h,
-                         art::ObjPtr<art::mirror::Object> initial_object ATTRIBUTE_UNUSED,
+                         art::ObjPtr<art::mirror::Object> initial_object,
                          const jvmtiHeapCallbacks* callbacks,
                          const void* user_data)
       : tag_table_(h->GetTags()),
+        initial_object_(initial_object),
         callbacks_(callbacks),
         user_data_(user_data),
         start_(0),
@@ -187,13 +188,23 @@
   void Init()
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
-    CollectAndReportRootsVisitor carrv(this, tag_table_, &worklist_, &visited_);
-    art::Runtime::Current()->VisitRoots(&carrv);
-    art::Runtime::Current()->VisitImageRoots(&carrv);
-    stop_reports_ = carrv.IsStopReports();
+    if (initial_object_.IsNull()) {
+      CollectAndReportRootsVisitor carrv(this, tag_table_, &worklist_, &visited_);
 
-    if (stop_reports_) {
-      worklist_.clear();
+      // We need precise info (e.g., vregs).
+      constexpr art::VisitRootFlags kRootFlags = static_cast<art::VisitRootFlags>(
+          art::VisitRootFlags::kVisitRootFlagAllRoots | art::VisitRootFlags::kVisitRootFlagPrecise);
+      art::Runtime::Current()->VisitRoots(&carrv, kRootFlags);
+
+      art::Runtime::Current()->VisitImageRoots(&carrv);
+      stop_reports_ = carrv.IsStopReports();
+
+      if (stop_reports_) {
+        worklist_.clear();
+      }
+    } else {
+      visited_.insert(initial_object_.Ptr());
+      worklist_.push_back(initial_object_.Ptr());
     }
   }
 
@@ -316,7 +327,36 @@
         }
 
         case art::RootType::kRootJavaFrame:
+        {
+          uint32_t thread_id = info.GetThreadId();
+          ref_info->stack_local.thread_id = thread_id;
+
+          art::Thread* thread = FindThread(info);
+          if (thread != nullptr) {
+            art::mirror::Object* thread_obj = thread->GetPeer();
+            if (thread->IsStillStarting()) {
+              thread_obj = nullptr;
+            } else {
+              thread_obj = thread->GetPeer();
+            }
+            if (thread_obj != nullptr) {
+              ref_info->stack_local.thread_tag = tag_table_->GetTagOrZero(thread_obj);
+            }
+          }
+
+          auto& java_info = static_cast<const art::JavaFrameRootInfo&>(info);
+          ref_info->stack_local.slot = static_cast<jint>(java_info.GetVReg());
+          const art::StackVisitor* visitor = java_info.GetVisitor();
+          ref_info->stack_local.location =
+              static_cast<jlocation>(visitor->GetDexPc(false /* abort_on_failure */));
+          ref_info->stack_local.depth = static_cast<jint>(visitor->GetFrameDepth());
+          art::ArtMethod* method = visitor->GetMethod();
+          if (method != nullptr) {
+            ref_info->stack_local.method = art::jni::EncodeArtMethod(method);
+          }
+
           return JVMTI_HEAP_REFERENCE_STACK_LOCAL;
+        }
 
         case art::RootType::kRootNativeStack:
         case art::RootType::kRootThreadBlock:
@@ -616,6 +656,7 @@
   }
 
   ObjectTagTable* tag_table_;
+  art::ObjPtr<art::mirror::Object> initial_object_;
   const jvmtiHeapCallbacks* callbacks_;
   const void* user_data_;
 
@@ -646,20 +687,28 @@
   }
 
   art::Thread* self = art::Thread::Current();
-  art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
 
-  art::Runtime::Current()->GetHeap()->IncrementDisableMovingGC(self);
+  art::gc::Heap* heap = art::Runtime::Current()->GetHeap();
+  if (heap->IsGcConcurrentAndMoving()) {
+    // Need to take a heap dump while GC isn't running. See the
+    // comment in Heap::VisitObjects().
+    heap->IncrementDisableMovingGC(self);
+  }
   {
-    art::ObjPtr<art::mirror::Object> o_initial = soa.Decode<art::mirror::Object>(initial_object);
-
+    art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
     art::ScopedThreadSuspension sts(self, art::kWaitingForVisitObjects);
     art::ScopedSuspendAll ssa("FollowReferences");
 
-    FollowReferencesHelper frh(this, o_initial, callbacks, user_data);
+    FollowReferencesHelper frh(this,
+                               self->DecodeJObject(initial_object),
+                               callbacks,
+                               user_data);
     frh.Init();
     frh.Work();
   }
-  art::Runtime::Current()->GetHeap()->DecrementDisableMovingGC(self);
+  if (heap->IsGcConcurrentAndMoving()) {
+    heap->DecrementDisableMovingGC(self);
+  }
 
   return ERR(NONE);
 }
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 6f8976f..579fb50 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -67,14 +67,10 @@
       m = m->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
       jmethodID id = art::jni::EncodeArtMethod(m);
 
-      art::mirror::DexCache* dex_cache = m->GetDexCache();
-      int32_t line_number = -1;
-      if (dex_cache != nullptr) {  // be tolerant of bad input
-        const art::DexFile* dex_file = dex_cache->GetDexFile();
-        line_number = art::annotations::GetLineNumFromPC(dex_file, m, GetDexPc(false));
-      }
+      uint32_t dex_pc = GetDexPc(false);
+      jlong dex_location = (dex_pc == art::DexFile::kDexNoIndex) ? -1 : static_cast<jlong>(dex_pc);
 
-      jvmtiFrameInfo info = { id, static_cast<jlong>(line_number) };
+      jvmtiFrameInfo info = { id, dex_location };
       frames.push_back(info);
 
       if (stop == 1) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index bf34548..59c5961 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -37,6 +37,8 @@
 #include <vector>
 #include <fcntl.h>
 
+#include "android-base/strings.h"
+
 #include "JniConstants.h"
 #include "ScopedLocalRef.h"
 #include "arch/arm/quick_method_frame_info_arm.h"
@@ -869,7 +871,7 @@
         ImageHeader::GetOatLocationFromImageLocation(image_locations[index].c_str());
     // Note: in the multi-image case, the image location may end in ".jar," and not ".art." Handle
     //       that here.
-    if (EndsWith(oat_location, ".jar")) {
+    if (android::base::EndsWith(oat_location, ".jar")) {
       oat_location.replace(oat_location.length() - 3, 3, "oat");
     }
     std::string error_msg;
@@ -1225,7 +1227,7 @@
       for (const DexFile* dex_file : boot_class_path) {
         dex_locations.push_back(dex_file->GetLocation());
       }
-      boot_class_path_string_ = Join(dex_locations, ':');
+      boot_class_path_string_ = android::base::Join(dex_locations, ':');
     }
     {
       ScopedTrace trace2("AddImageStringsToTable");
@@ -1695,13 +1697,13 @@
   VisitTransactionRoots(visitor);
 }
 
-void Runtime::VisitNonConcurrentRoots(RootVisitor* visitor) {
-  thread_list_->VisitRoots(visitor);
+void Runtime::VisitNonConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  VisitThreadRoots(visitor, flags);
   VisitNonThreadRoots(visitor);
 }
 
-void Runtime::VisitThreadRoots(RootVisitor* visitor) {
-  thread_list_->VisitRoots(visitor);
+void Runtime::VisitThreadRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  thread_list_->VisitRoots(visitor, flags);
 }
 
 size_t Runtime::FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
@@ -1710,7 +1712,7 @@
 }
 
 void Runtime::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
-  VisitNonConcurrentRoots(visitor);
+  VisitNonConcurrentRoots(visitor, flags);
   VisitConcurrentRoots(visitor, flags);
 }
 
@@ -1892,7 +1894,7 @@
   }
 
   VLOG(profiler) << "Register app with " << profile_output_filename
-      << " " << Join(code_paths, ':');
+      << " " << android::base::Join(code_paths, ':');
 
   if (profile_output_filename.empty()) {
     LOG(WARNING) << "JIT profile information will not be recorded: profile filename is empty.";
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e6b3128..d40c631 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -100,18 +100,6 @@
 
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
 
-// Not all combinations of flags are valid. You may not visit all roots as well as the new roots
-// (no logical reason to do this). You also may not start logging new roots and stop logging new
-// roots (also no logical reason to do this).
-enum VisitRootFlags : uint8_t {
-  kVisitRootFlagAllRoots = 0x1,
-  kVisitRootFlagNewRoots = 0x2,
-  kVisitRootFlagStartLoggingNewRoots = 0x4,
-  kVisitRootFlagStopLoggingNewRoots = 0x8,
-  kVisitRootFlagClearRootLog = 0x10,
-  kVisitRootFlagClassLoader = 0x20,
-};
-
 class Runtime {
  public:
   // Parse raw runtime options.
@@ -349,28 +337,16 @@
   void VisitTransactionRoots(RootVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Visit all of the thread roots.
-  void VisitThreadRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Flip thread roots from from-space refs to to-space refs.
   size_t FlipThreadRoots(Closure* thread_flip_visitor, Closure* flip_callback,
                          gc::collector::GarbageCollector* collector)
       REQUIRES(!Locks::mutator_lock_);
 
-  // Visit all other roots which must be done with mutators suspended.
-  void VisitNonConcurrentRoots(RootVisitor* visitor)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Sweep system weaks, the system weak is deleted if the visitor return null. Otherwise, the
   // system weak is updated to be the visitor's returned value.
   void SweepSystemWeaks(IsMarkedVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Constant roots are the roots which never change after the runtime is initialized, they only
-  // need to be visited once per GC cycle.
-  void VisitConstantRoots(RootVisitor* visitor)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Returns a special method that calls into a trampoline for runtime method resolution
   ArtMethod* GetResolutionMethod();
 
@@ -702,6 +678,19 @@
 
   void MaybeSaveJitProfilingInfo();
 
+  // Visit all of the thread roots.
+  void VisitThreadRoots(RootVisitor* visitor, VisitRootFlags flags)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Visit all other roots which must be done with mutators suspended.
+  void VisitNonConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Constant roots are the roots which never change after the runtime is initialized, they only
+  // need to be visited once per GC cycle.
+  void VisitConstantRoots(RootVisitor* visitor)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index f20aa20..792da88 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -614,12 +614,6 @@
   return result;
 }
 
-static instrumentation::InstrumentationStackFrame& GetInstrumentationStackFrame(Thread* thread,
-                                                                                uint32_t depth) {
-  CHECK_LT(depth, thread->GetInstrumentationStack()->size());
-  return thread->GetInstrumentationStack()->at(depth);
-}
-
 static void AssertPcIsWithinQuickCode(ArtMethod* method, uintptr_t pc)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (method->IsNative() || method->IsRuntimeMethod() || method->IsProxyMethod()) {
@@ -777,6 +771,7 @@
   return QuickMethodFrameInfo(frame_size, callee_info.CoreSpillMask(), callee_info.FpSpillMask());
 }
 
+template <StackVisitor::CountTransitions kCount>
 void StackVisitor::WalkStack(bool include_transitions) {
   DCHECK(thread_ == Thread::Current() || thread_->IsSuspended());
   CHECK_EQ(cur_depth_, 0U);
@@ -842,8 +837,9 @@
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
           if (reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()) == return_pc) {
+            CHECK_LT(instrumentation_stack_depth, thread_->GetInstrumentationStack()->size());
             const instrumentation::InstrumentationStackFrame& instrumentation_frame =
-                GetInstrumentationStackFrame(thread_, instrumentation_stack_depth);
+                thread_->GetInstrumentationStack()->at(instrumentation_stack_depth);
             instrumentation_stack_depth++;
             if (GetMethod() ==
                 Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
@@ -907,13 +903,18 @@
         return;
       }
     }
-    cur_depth_++;
+    if (kCount == CountTransitions::kYes) {
+      cur_depth_++;
+    }
   }
   if (num_frames_ != 0) {
     CHECK_EQ(cur_depth_, num_frames_);
   }
 }
 
+template void StackVisitor::WalkStack<StackVisitor::CountTransitions::kYes>(bool);
+template void StackVisitor::WalkStack<StackVisitor::CountTransitions::kNo>(bool);
+
 void JavaFrameRootInfo::Describe(std::ostream& os) const {
   const StackVisitor* visitor = stack_visitor_;
   CHECK(visitor != nullptr);
diff --git a/runtime/stack.h b/runtime/stack.h
index d02e4b7..b1e99e5 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -595,6 +595,12 @@
   // Return 'true' if we should continue to visit more frames, 'false' to stop.
   virtual bool VisitFrame() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
+  enum class CountTransitions {
+    kYes,
+    kNo,
+  };
+
+  template <CountTransitions kCount = CountTransitions::kYes>
   void WalkStack(bool include_transitions = false)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 17f5513..d79bf36 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -873,6 +873,62 @@
   Dbg::DdmSendThreadNotification(this, CHUNK_TYPE("THNM"));
 }
 
+static void GetThreadStack(pthread_t thread,
+                           void** stack_base,
+                           size_t* stack_size,
+                           size_t* guard_size) {
+#if defined(__APPLE__)
+  *stack_size = pthread_get_stacksize_np(thread);
+  void* stack_addr = pthread_get_stackaddr_np(thread);
+
+  // Check whether stack_addr is the base or end of the stack.
+  // (On Mac OS 10.7, it's the end.)
+  int stack_variable;
+  if (stack_addr > &stack_variable) {
+    *stack_base = reinterpret_cast<uint8_t*>(stack_addr) - *stack_size;
+  } else {
+    *stack_base = stack_addr;
+  }
+
+  // This is wrong, but there doesn't seem to be a way to get the actual value on the Mac.
+  pthread_attr_t attributes;
+  CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), __FUNCTION__);
+  CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
+  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
+#else
+  pthread_attr_t attributes;
+  CHECK_PTHREAD_CALL(pthread_getattr_np, (thread, &attributes), __FUNCTION__);
+  CHECK_PTHREAD_CALL(pthread_attr_getstack, (&attributes, stack_base, stack_size), __FUNCTION__);
+  CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
+  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
+
+#if defined(__GLIBC__)
+  // If we're the main thread, check whether we were run with an unlimited stack. In that case,
+  // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
+  // will be broken because we'll die long before we get close to 2GB.
+  bool is_main_thread = (::art::GetTid() == getpid());
+  if (is_main_thread) {
+    rlimit stack_limit;
+    if (getrlimit(RLIMIT_STACK, &stack_limit) == -1) {
+      PLOG(FATAL) << "getrlimit(RLIMIT_STACK) failed";
+    }
+    if (stack_limit.rlim_cur == RLIM_INFINITY) {
+      size_t old_stack_size = *stack_size;
+
+      // Use the kernel default limit as our size, and adjust the base to match.
+      *stack_size = 8 * MB;
+      *stack_base = reinterpret_cast<uint8_t*>(*stack_base) + (old_stack_size - *stack_size);
+
+      VLOG(threads) << "Limiting unlimited stack (reported as " << PrettySize(old_stack_size) << ")"
+                    << " to " << PrettySize(*stack_size)
+                    << " with base " << *stack_base;
+    }
+  }
+#endif
+
+#endif
+}
+
 bool Thread::InitStackHwm() {
   void* read_stack_base;
   size_t read_stack_size;
@@ -1322,6 +1378,32 @@
   VLOG(threads) << this << " self-reviving";
 }
 
+static std::string GetSchedulerGroupName(pid_t tid) {
+  // /proc/<pid>/cgroup looks like this:
+  // 2:devices:/
+  // 1:cpuacct,cpu:/
+  // We want the third field from the line whose second field contains the "cpu" token.
+  std::string cgroup_file;
+  if (!ReadFileToString(StringPrintf("/proc/self/task/%d/cgroup", tid), &cgroup_file)) {
+    return "";
+  }
+  std::vector<std::string> cgroup_lines;
+  Split(cgroup_file, '\n', &cgroup_lines);
+  for (size_t i = 0; i < cgroup_lines.size(); ++i) {
+    std::vector<std::string> cgroup_fields;
+    Split(cgroup_lines[i], ':', &cgroup_fields);
+    std::vector<std::string> cgroups;
+    Split(cgroup_fields[1], ',', &cgroups);
+    for (size_t j = 0; j < cgroups.size(); ++j) {
+      if (cgroups[j] == "cpu") {
+        return cgroup_fields[2].substr(1);  // Skip the leading slash.
+      }
+    }
+  }
+  return "";
+}
+
+
 void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
   std::string group_name;
   int priority;
@@ -2802,7 +2884,7 @@
 }
 
 // RootVisitor parameters are: (const Object* obj, size_t vreg, const StackVisitor* visitor).
-template <typename RootVisitor>
+template <typename RootVisitor, bool kPrecise = false>
 class ReferenceMapVisitor : public StackVisitor {
  public:
   ReferenceMapVisitor(Thread* thread, Context* context, RootVisitor& visitor)
@@ -2889,7 +2971,9 @@
     }
   }
 
-  void VisitQuickFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
+  template <typename T>
+  ALWAYS_INLINE
+  inline void VisitQuickFrameWithVregCallback() REQUIRES_SHARED(Locks::mutator_lock_) {
     ArtMethod** cur_quick_frame = GetCurrentQuickFrame();
     DCHECK(cur_quick_frame != nullptr);
     ArtMethod* m = *cur_quick_frame;
@@ -2906,6 +2990,9 @@
       CodeInfoEncoding encoding = code_info.ExtractEncoding();
       StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
       DCHECK(map.IsValid());
+
+      T vreg_info(m, code_info, encoding, map, visitor_);
+
       // Visit stack entries that hold pointers.
       size_t number_of_bits = map.GetNumberOfStackMaskBits(encoding.stack_map_encoding);
       for (size_t i = 0; i < number_of_bits; ++i) {
@@ -2914,7 +3001,7 @@
           mirror::Object* ref = ref_addr->AsMirrorPtr();
           if (ref != nullptr) {
             mirror::Object* new_ref = ref;
-            visitor_(&new_ref, -1, this);
+            vreg_info.VisitStack(&new_ref, i, this);
             if (ref != new_ref) {
               ref_addr->Assign(new_ref);
             }
@@ -2935,13 +3022,119 @@
                        << "set in register_mask=" << register_mask << " at " << DescribeLocation();
           }
           if (*ref_addr != nullptr) {
-            visitor_(ref_addr, -1, this);
+            vreg_info.VisitRegister(ref_addr, i, this);
           }
         }
       }
     }
   }
 
+  void VisitQuickFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kPrecise) {
+      VisitQuickFramePrecise();
+    } else {
+      VisitQuickFrameNonPrecise();
+    }
+  }
+
+  void VisitQuickFrameNonPrecise() REQUIRES_SHARED(Locks::mutator_lock_) {
+    struct UndefinedVRegInfo {
+      UndefinedVRegInfo(ArtMethod* method ATTRIBUTE_UNUSED,
+                        const CodeInfo& code_info ATTRIBUTE_UNUSED,
+                        const CodeInfoEncoding& encoding ATTRIBUTE_UNUSED,
+                        const StackMap& map ATTRIBUTE_UNUSED,
+                        RootVisitor& _visitor)
+          : visitor(_visitor) {
+      }
+
+      ALWAYS_INLINE
+      void VisitStack(mirror::Object** ref,
+                      size_t stack_index ATTRIBUTE_UNUSED,
+                      const StackVisitor* stack_visitor)
+          REQUIRES_SHARED(Locks::mutator_lock_) {
+        visitor(ref, -1, stack_visitor);
+      }
+
+      ALWAYS_INLINE
+      void VisitRegister(mirror::Object** ref,
+                         size_t register_index ATTRIBUTE_UNUSED,
+                         const StackVisitor* stack_visitor)
+          REQUIRES_SHARED(Locks::mutator_lock_) {
+        visitor(ref, -1, stack_visitor);
+      }
+
+      RootVisitor& visitor;
+    };
+    VisitQuickFrameWithVregCallback<UndefinedVRegInfo>();
+  }
+
+  void VisitQuickFramePrecise() REQUIRES_SHARED(Locks::mutator_lock_) {
+    struct StackMapVRegInfo {
+      StackMapVRegInfo(ArtMethod* method,
+                       const CodeInfo& _code_info,
+                       const CodeInfoEncoding& _encoding,
+                       const StackMap& map,
+                       RootVisitor& _visitor)
+          : number_of_dex_registers(method->GetCodeItem()->registers_size_),
+            code_info(_code_info),
+            encoding(_encoding),
+            dex_register_map(code_info.GetDexRegisterMapOf(map,
+                                                           encoding,
+                                                           number_of_dex_registers)),
+            visitor(_visitor) {
+      }
+
+      // TODO: If necessary, we should consider caching a reverse map instead of the linear
+      //       lookups for each location.
+      void FindWithType(const size_t index,
+                        const DexRegisterLocation::Kind kind,
+                        mirror::Object** ref,
+                        const StackVisitor* stack_visitor)
+          REQUIRES_SHARED(Locks::mutator_lock_) {
+        bool found = false;
+        for (size_t dex_reg = 0; dex_reg != number_of_dex_registers; ++dex_reg) {
+          DexRegisterLocation location = dex_register_map.GetDexRegisterLocation(
+              dex_reg, number_of_dex_registers, code_info, encoding);
+          if (location.GetKind() == kind && static_cast<size_t>(location.GetValue()) == index) {
+            visitor(ref, dex_reg, stack_visitor);
+            found = true;
+          }
+        }
+
+        if (!found) {
+          // If nothing found, report with -1.
+          visitor(ref, -1, stack_visitor);
+        }
+      }
+
+      void VisitStack(mirror::Object** ref, size_t stack_index, const StackVisitor* stack_visitor)
+          REQUIRES_SHARED(Locks::mutator_lock_) {
+        const size_t stack_offset = stack_index * kFrameSlotSize;
+        FindWithType(stack_offset,
+                     DexRegisterLocation::Kind::kInStack,
+                     ref,
+                     stack_visitor);
+      }
+
+      void VisitRegister(mirror::Object** ref,
+                         size_t register_index,
+                         const StackVisitor* stack_visitor)
+          REQUIRES_SHARED(Locks::mutator_lock_) {
+        FindWithType(register_index,
+                     DexRegisterLocation::Kind::kInRegister,
+                     ref,
+                     stack_visitor);
+      }
+
+      size_t number_of_dex_registers;
+      const CodeInfo& code_info;
+      const CodeInfoEncoding& encoding;
+      DexRegisterMap dex_register_map;
+      RootVisitor& visitor;
+    };
+    VisitQuickFrameWithVregCallback<StackMapVRegInfo>();
+  }
+
   // Visitor for when we visit a root.
   RootVisitor& visitor_;
 };
@@ -2960,6 +3153,7 @@
   const uint32_t tid_;
 };
 
+template <bool kPrecise>
 void Thread::VisitRoots(RootVisitor* visitor) {
   const uint32_t thread_id = GetThreadId();
   visitor->VisitRootIfNonNull(&tlsPtr_.opeer, RootInfo(kRootThreadObject, thread_id));
@@ -2977,7 +3171,7 @@
   // Visit roots for deoptimization.
   if (tlsPtr_.stacked_shadow_frame_record != nullptr) {
     RootCallbackVisitor visitor_to_callback(visitor, thread_id);
-    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
+    ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, nullptr, visitor_to_callback);
     for (StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record;
          record != nullptr;
          record = record->GetLink()) {
@@ -3000,7 +3194,7 @@
   }
   if (tlsPtr_.frame_id_to_shadow_frame != nullptr) {
     RootCallbackVisitor visitor_to_callback(visitor, thread_id);
-    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback);
+    ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, nullptr, visitor_to_callback);
     for (FrameIdToShadowFrame* record = tlsPtr_.frame_id_to_shadow_frame;
          record != nullptr;
          record = record->GetNext()) {
@@ -3013,14 +3207,22 @@
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
   RootCallbackVisitor visitor_to_callback(visitor, thread_id);
-  ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context, visitor_to_callback);
-  mapper.WalkStack();
+  ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, context, visitor_to_callback);
+  mapper.template WalkStack<StackVisitor::CountTransitions::kNo>(false);
   ReleaseLongJumpContext(context);
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     visitor->VisitRootIfNonNull(&frame.this_object_, RootInfo(kRootVMInternal, thread_id));
   }
 }
 
+void Thread::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) {
+  if ((flags & VisitRootFlags::kVisitRootFlagPrecise) != 0) {
+    VisitRoots<true>(visitor);
+  } else {
+    VisitRoots<false>(visitor);
+  }
+}
+
 class VerifyRootVisitor : public SingleRootVisitor {
  public:
   void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
diff --git a/runtime/thread.h b/runtime/thread.h
index b80fdc7..31cd0eb 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -549,7 +549,8 @@
     return tlsPtr_.frame_id_to_shadow_frame != nullptr;
   }
 
-  void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE void VerifyStack() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -1245,6 +1246,9 @@
   // Install the protected region for implicit stack checks.
   void InstallImplicitProtection();
 
+  template <bool kPrecise>
+  void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
   static bool IsAotCompiler();
 
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index a6bd83d..664eeb4 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1395,10 +1395,10 @@
   }
 }
 
-void ThreadList::VisitRoots(RootVisitor* visitor) const {
+void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const {
   MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (const auto& thread : list_) {
-    thread->VisitRoots(visitor);
+    thread->VisitRoots(visitor, flags);
   }
 }
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 1acabcb..658db00 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -155,7 +155,7 @@
                !Locks::thread_list_lock_,
                !Locks::thread_suspend_count_lock_);
 
-  void VisitRoots(RootVisitor* visitor) const
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void VisitRootsForSuspendedThreads(RootVisitor* visitor)
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 66739a9..4732f59 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -25,6 +25,8 @@
 #include <unistd.h>
 #include <memory>
 
+#include "android-base/strings.h"
+
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
@@ -139,59 +141,6 @@
   return result;
 }
 
-void GetThreadStack(pthread_t thread, void** stack_base, size_t* stack_size, size_t* guard_size) {
-#if defined(__APPLE__)
-  *stack_size = pthread_get_stacksize_np(thread);
-  void* stack_addr = pthread_get_stackaddr_np(thread);
-
-  // Check whether stack_addr is the base or end of the stack.
-  // (On Mac OS 10.7, it's the end.)
-  int stack_variable;
-  if (stack_addr > &stack_variable) {
-    *stack_base = reinterpret_cast<uint8_t*>(stack_addr) - *stack_size;
-  } else {
-    *stack_base = stack_addr;
-  }
-
-  // This is wrong, but there doesn't seem to be a way to get the actual value on the Mac.
-  pthread_attr_t attributes;
-  CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), __FUNCTION__);
-  CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
-  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
-#else
-  pthread_attr_t attributes;
-  CHECK_PTHREAD_CALL(pthread_getattr_np, (thread, &attributes), __FUNCTION__);
-  CHECK_PTHREAD_CALL(pthread_attr_getstack, (&attributes, stack_base, stack_size), __FUNCTION__);
-  CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, guard_size), __FUNCTION__);
-  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), __FUNCTION__);
-
-#if defined(__GLIBC__)
-  // If we're the main thread, check whether we were run with an unlimited stack. In that case,
-  // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
-  // will be broken because we'll die long before we get close to 2GB.
-  bool is_main_thread = (::art::GetTid() == getpid());
-  if (is_main_thread) {
-    rlimit stack_limit;
-    if (getrlimit(RLIMIT_STACK, &stack_limit) == -1) {
-      PLOG(FATAL) << "getrlimit(RLIMIT_STACK) failed";
-    }
-    if (stack_limit.rlim_cur == RLIM_INFINITY) {
-      size_t old_stack_size = *stack_size;
-
-      // Use the kernel default limit as our size, and adjust the base to match.
-      *stack_size = 8 * MB;
-      *stack_base = reinterpret_cast<uint8_t*>(*stack_base) + (old_stack_size - *stack_size);
-
-      VLOG(threads) << "Limiting unlimited stack (reported as " << PrettySize(old_stack_size) << ")"
-                    << " to " << PrettySize(*stack_size)
-                    << " with base " << *stack_base;
-    }
-  }
-#endif
-
-#endif
-}
-
 bool ReadFileToString(const std::string& file_name, std::string* result) {
   File file(file_name, O_RDONLY, false);
   if (!file.IsOpened()) {
@@ -411,6 +360,10 @@
                       negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
+static inline constexpr bool NeedsEscaping(uint16_t ch) {
+  return (ch < ' ' || ch > '~');
+}
+
 std::string PrintableChar(uint16_t ch) {
   std::string result;
   result += '\'';
@@ -782,67 +735,6 @@
   }
 }
 
-std::string Trim(const std::string& s) {
-  std::string result;
-  unsigned int start_index = 0;
-  unsigned int end_index = s.size() - 1;
-
-  // Skip initial whitespace.
-  while (start_index < s.size()) {
-    if (!isspace(s[start_index])) {
-      break;
-    }
-    start_index++;
-  }
-
-  // Skip terminating whitespace.
-  while (end_index >= start_index) {
-    if (!isspace(s[end_index])) {
-      break;
-    }
-    end_index--;
-  }
-
-  // All spaces, no beef.
-  if (end_index < start_index) {
-    return "";
-  }
-  // Start_index is the first non-space, end_index is the last one.
-  return s.substr(start_index, end_index - start_index + 1);
-}
-
-template <typename StringT>
-std::string Join(const std::vector<StringT>& strings, char separator) {
-  if (strings.empty()) {
-    return "";
-  }
-
-  std::string result(strings[0]);
-  for (size_t i = 1; i < strings.size(); ++i) {
-    result += separator;
-    result += strings[i];
-  }
-  return result;
-}
-
-// Explicit instantiations.
-template std::string Join<std::string>(const std::vector<std::string>& strings, char separator);
-template std::string Join<const char*>(const std::vector<const char*>& strings, char separator);
-
-bool StartsWith(const std::string& s, const char* prefix) {
-  return s.compare(0, strlen(prefix), prefix) == 0;
-}
-
-bool EndsWith(const std::string& s, const char* suffix) {
-  size_t suffix_length = strlen(suffix);
-  size_t string_length = s.size();
-  if (suffix_length > string_length) {
-    return false;
-  }
-  size_t offset = string_length - suffix_length;
-  return s.compare(offset, suffix_length, suffix) == 0;
-}
-
 void SetThreadName(const char* thread_name) {
   int hasAt = 0;
   int hasDot = 0;
@@ -892,31 +784,6 @@
   *task_cpu = strtoull(fields[36].c_str(), nullptr, 10);
 }
 
-std::string GetSchedulerGroupName(pid_t tid) {
-  // /proc/<pid>/cgroup looks like this:
-  // 2:devices:/
-  // 1:cpuacct,cpu:/
-  // We want the third field from the line whose second field contains the "cpu" token.
-  std::string cgroup_file;
-  if (!ReadFileToString(StringPrintf("/proc/self/task/%d/cgroup", tid), &cgroup_file)) {
-    return "";
-  }
-  std::vector<std::string> cgroup_lines;
-  Split(cgroup_file, '\n', &cgroup_lines);
-  for (size_t i = 0; i < cgroup_lines.size(); ++i) {
-    std::vector<std::string> cgroup_fields;
-    Split(cgroup_lines[i], ':', &cgroup_fields);
-    std::vector<std::string> cgroups;
-    Split(cgroup_fields[1], ',', &cgroups);
-    for (size_t j = 0; j < cgroups.size(); ++j) {
-      if (cgroups[j] == "cpu") {
-        return cgroup_fields[2].substr(1);  // Skip the leading slash.
-      }
-    }
-  }
-  return "";
-}
-
 const char* GetAndroidRoot() {
   const char* android_root = getenv("ANDROID_ROOT");
   if (android_root == nullptr) {
@@ -1005,7 +872,9 @@
     return false;
   }
   std::string cache_file(&location[1]);  // skip leading slash
-  if (!EndsWith(location, ".dex") && !EndsWith(location, ".art") && !EndsWith(location, ".oat")) {
+  if (!android::base::EndsWith(location, ".dex") &&
+      !android::base::EndsWith(location, ".art") &&
+      !android::base::EndsWith(location, ".oat")) {
     cache_file += "/";
     cache_file += DexFile::kClassesDex;
   }
@@ -1032,7 +901,7 @@
 }
 
 int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
-  const std::string command_line(Join(arg_vector, ' '));
+  const std::string command_line(android::base::Join(arg_vector, ' '));
   CHECK_GE(arg_vector.size(), 1U) << command_line;
 
   // Convert the args to char pointers.
@@ -1091,7 +960,7 @@
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
   int status = ExecAndReturnCode(arg_vector, error_msg);
   if (status != 0) {
-    const std::string command_line(Join(arg_vector, ' '));
+    const std::string command_line(android::base::Join(arg_vector, ' '));
     *error_msg = StringPrintf("Failed execv(%s) because non-0 exit status",
                               command_line.c_str());
     return false;
diff --git a/runtime/utils.h b/runtime/utils.h
index 1e98057..04e0dde 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -64,45 +64,12 @@
   return true;
 }
 
-// Return whether x / divisor == x * (1.0f / divisor), for every float x.
-static constexpr bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
-  // True, if the most significant bits of divisor are 0.
-  return ((divisor & 0x7fffff) == 0);
-}
-
-// Return whether x / divisor == x * (1.0 / divisor), for every double x.
-static constexpr bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
-  // True, if the most significant bits of divisor are 0.
-  return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
-}
-
 static inline uint32_t PointerToLowMemUInt32(const void* p) {
   uintptr_t intp = reinterpret_cast<uintptr_t>(p);
   DCHECK_LE(intp, 0xFFFFFFFFU);
   return intp & 0xFFFFFFFFU;
 }
 
-static inline bool NeedsEscaping(uint16_t ch) {
-  return (ch < ' ' || ch > '~');
-}
-
-template <typename T> T SafeAbs(T value) {
-  // std::abs has undefined behavior on min limits.
-  DCHECK_NE(value, std::numeric_limits<T>::min());
-  return std::abs(value);
-}
-
-template <typename T> T AbsOrMin(T value) {
-  return (value == std::numeric_limits<T>::min())
-      ? value
-      : std::abs(value);
-}
-
-template <typename T>
-inline typename std::make_unsigned<T>::type MakeUnsigned(T x) {
-  return static_cast<typename std::make_unsigned<T>::type>(x);
-}
-
 uint8_t* DecodeBase64(const char* src, size_t* dst_size);
 
 std::string PrintableChar(uint16_t ch);
@@ -111,12 +78,6 @@
 // Java escapes are used for non-ASCII characters.
 std::string PrintableString(const char* utf8);
 
-// Tests whether 's' starts with 'prefix'.
-bool StartsWith(const std::string& s, const char* prefix);
-
-// Tests whether 's' ends with 'suffix'.
-bool EndsWith(const std::string& s, const char* suffix);
-
 // Used to implement PrettyClass, PrettyField, PrettyMethod, and PrettyTypeOf,
 // one of which is probably more useful to you.
 // Returns a human-readable equivalent of 'descriptor'. So "I" would be "int",
@@ -167,27 +128,15 @@
 // strings. Empty strings will be omitted.
 void Split(const std::string& s, char separator, std::vector<std::string>* result);
 
-// Trims whitespace off both ends of the given string.
-std::string Trim(const std::string& s);
-
-// Joins a vector of strings into a single string, using the given separator.
-template <typename StringT> std::string Join(const std::vector<StringT>& strings, char separator);
-
 // Returns the calling thread's tid. (The C libraries don't expose this.)
 pid_t GetTid();
 
 // Returns the given thread's name.
 std::string GetThreadName(pid_t tid);
 
-// Returns details of the given thread's stack.
-void GetThreadStack(pthread_t thread, void** stack_base, size_t* stack_size, size_t* guard_size);
-
 // Reads data from "/proc/self/task/${tid}/stat".
 void GetTaskStats(pid_t tid, char* state, int* utime, int* stime, int* task_cpu);
 
-// Returns the name of the scheduler group for the given thread the current process, or the empty string.
-std::string GetSchedulerGroupName(pid_t tid);
-
 // Sets the name of the current thread. The name may be truncated to an
 // implementation-defined limit.
 void SetThreadName(const char* thread_name);
@@ -251,15 +200,6 @@
   }
 };
 
-template <typename Vector>
-void Push32(Vector* buf, int32_t data) {
-  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
-  buf->push_back((data >> 16) & 0xff);
-  buf->push_back((data >> 24) & 0xff);
-}
-
 inline bool TestBitmap(size_t idx, const uint8_t* bitmap) {
   return ((bitmap[idx / kBitsPerByte] >> (idx % kBitsPerByte)) & 0x01) != 0;
 }
@@ -334,12 +274,6 @@
   return dist(rng);
 }
 
-// All of the elements from one container to another.
-template <typename Dest, typename Src>
-static void AddAll(Dest& dest, const Src& src) {
-  dest.insert(src.begin(), src.end());
-}
-
 // Return the file size in bytes or -1 if the file does not exists.
 int64_t GetFileSizeBytes(const std::string& filename);
 
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index be4d394..82d92fc 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -273,58 +273,6 @@
   EXPECT_EQ(expected, actual);
 }
 
-TEST_F(UtilsTest, Join) {
-  std::vector<std::string> strings;
-
-  strings.clear();
-  EXPECT_EQ("", Join(strings, ':'));
-
-  strings.clear();
-  strings.push_back("foo");
-  EXPECT_EQ("foo", Join(strings, ':'));
-
-  strings.clear();
-  strings.push_back("");
-  strings.push_back("foo");
-  EXPECT_EQ(":foo", Join(strings, ':'));
-
-  strings.clear();
-  strings.push_back("foo");
-  strings.push_back("");
-  EXPECT_EQ("foo:", Join(strings, ':'));
-
-  strings.clear();
-  strings.push_back("");
-  strings.push_back("foo");
-  strings.push_back("");
-  EXPECT_EQ(":foo:", Join(strings, ':'));
-
-  strings.clear();
-  strings.push_back("foo");
-  strings.push_back("bar");
-  EXPECT_EQ("foo:bar", Join(strings, ':'));
-
-  strings.clear();
-  strings.push_back("foo");
-  strings.push_back("bar");
-  strings.push_back("baz");
-  EXPECT_EQ("foo:bar:baz", Join(strings, ':'));
-}
-
-TEST_F(UtilsTest, StartsWith) {
-  EXPECT_FALSE(StartsWith("foo", "bar"));
-  EXPECT_TRUE(StartsWith("foo", "foo"));
-  EXPECT_TRUE(StartsWith("food", "foo"));
-  EXPECT_FALSE(StartsWith("fo", "foo"));
-}
-
-TEST_F(UtilsTest, EndsWith) {
-  EXPECT_FALSE(EndsWith("foo", "bar"));
-  EXPECT_TRUE(EndsWith("foo", "foo"));
-  EXPECT_TRUE(EndsWith("foofoo", "foo"));
-  EXPECT_FALSE(EndsWith("oo", "foo"));
-}
-
 TEST_F(UtilsTest, GetDalvikCacheFilename) {
   std::string name;
   std::string error;
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 52be2df..be5c18b 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -19,6 +19,8 @@
 #include <stdio.h>
 #include <memory>
 
+#include "android-base/strings.h"
+
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
@@ -42,7 +44,7 @@
     MethodVerifier::FailureKind failure = MethodVerifier::VerifyClass(
         self, klass, nullptr, true, HardFailLogMode::kLogWarning, &error_msg);
 
-    if (StartsWith(descriptor, "Ljava/lang/invoke")) {
+    if (android::base::StartsWith(descriptor, "Ljava/lang/invoke")) {
       ASSERT_TRUE(failure == MethodVerifier::kSoftFailure ||
                   failure == MethodVerifier::kNoFailure) << error_msg;
 
diff --git a/test/021-string2/expected.txt b/test/021-string2/expected.txt
index a9c6eb8..f269c7c 100644
--- a/test/021-string2/expected.txt
+++ b/test/021-string2/expected.txt
@@ -1,2 +1,6 @@
 Got expected npe
 OK
+ true true true true
+ true true true true
+ true true true true
+ true true true true
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 51351e1..df0a3dd 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -92,6 +92,31 @@
 
         testCompareToAndEquals();
         testIndexOf();
+
+        String s0_0 = "\u0000";
+        String s0_1 = new String(s0_0);
+        String s0_2 = new String(new char[] { '\u0000' });
+        String s0_3 = s0_0 + "";
+        System.out.println(
+            " " + $noinline$equals(s0_0, s0_0) +
+            " " + $noinline$equals(s0_0, s0_1) +
+            " " + $noinline$equals(s0_0, s0_2) +
+            " " + $noinline$equals(s0_0, s0_3));
+        System.out.println(
+            " " + $noinline$equals(s0_1, s0_0) +
+            " " + $noinline$equals(s0_1, s0_1) +
+            " " + $noinline$equals(s0_1, s0_2) +
+            " " + $noinline$equals(s0_1, s0_3));
+        System.out.println(
+            " " + $noinline$equals(s0_2, s0_0) +
+            " " + $noinline$equals(s0_2, s0_1) +
+            " " + $noinline$equals(s0_2, s0_2) +
+            " " + $noinline$equals(s0_2, s0_3));
+        System.out.println(
+            " " + $noinline$equals(s0_3, s0_0) +
+            " " + $noinline$equals(s0_3, s0_1) +
+            " " + $noinline$equals(s0_3, s0_2) +
+            " " + $noinline$equals(s0_3, s0_3));
     }
 
     public static void testCompareToAndEquals() {
diff --git a/test/911-get-stack-trace/expected.txt b/test/911-get-stack-trace/expected.txt
index 20bab78..77c77ca 100644
--- a/test/911-get-stack-trace/expected.txt
+++ b/test/911-get-stack-trace/expected.txt
@@ -3,206 +3,206 @@
 ###################
 From top
 ---------
- getStackTrace (Ljava/lang/Thread;II)[Ljava/lang/String;
- print (Ljava/lang/Thread;II)V
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- doTest ()V
- main ([Ljava/lang/String;)V
+ getStackTrace (Ljava/lang/Thread;II)[[Ljava/lang/String; -1
+ print (Ljava/lang/Thread;II)V 0
+ printOrWait (IILMain$ControlData;)V 6
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ doTest ()V 38
+ main ([Ljava/lang/String;)V 6
 ---------
- print (Ljava/lang/Thread;II)V
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- doTest ()V
- main ([Ljava/lang/String;)V
+ print (Ljava/lang/Thread;II)V 0
+ printOrWait (IILMain$ControlData;)V 6
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ doTest ()V 42
+ main ([Ljava/lang/String;)V 6
 ---------
- getStackTrace (Ljava/lang/Thread;II)[Ljava/lang/String;
- print (Ljava/lang/Thread;II)V
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
+ getStackTrace (Ljava/lang/Thread;II)[[Ljava/lang/String; -1
+ print (Ljava/lang/Thread;II)V 0
+ printOrWait (IILMain$ControlData;)V 6
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
 ---------
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
+ printOrWait (IILMain$ControlData;)V 6
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
 From bottom
 ---------
- main ([Ljava/lang/String;)V
+ main ([Ljava/lang/String;)V 6
 ---------
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- doTest ()V
- main ([Ljava/lang/String;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ doTest ()V 65
+ main ([Ljava/lang/String;)V 6
 ---------
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
 
 ################################
 ### Other thread (suspended) ###
 ################################
 From top
 ---------
- wait ()V
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- run ()V
+ wait ()V -1
+ printOrWait (IILMain$ControlData;)V 24
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ run ()V 4
 ---------
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- run ()V
+ printOrWait (IILMain$ControlData;)V 24
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ run ()V 4
 ---------
- wait ()V
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
+ wait ()V -1
+ printOrWait (IILMain$ControlData;)V 24
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
 ---------
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
 From bottom
 ---------
- run ()V
+ run ()V 4
 ---------
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- run ()V
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ run ()V 4
 ---------
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
 
 ###########################
 ### Other thread (live) ###
 ###########################
 From top
 ---------
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- run ()V
+ printOrWait (IILMain$ControlData;)V 44
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ run ()V 4
 ---------
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- run ()V
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ run ()V 4
 ---------
- printOrWait (IILMain$ControlData;)V
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
+ printOrWait (IILMain$ControlData;)V 44
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 2
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
 ---------
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
 From bottom
 ---------
- run ()V
+ run ()V 4
 ---------
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- run ()V
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ run ()V 4
 ---------
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
- foo (IIILMain$ControlData;)I
- baz (IIILMain$ControlData;)Ljava/lang/Object;
- bar (IIILMain$ControlData;)J
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
+ foo (IIILMain$ControlData;)I 0
+ baz (IIILMain$ControlData;)Ljava/lang/Object; 9
+ bar (IIILMain$ControlData;)J 0
diff --git a/test/911-get-stack-trace/src/Main.java b/test/911-get-stack-trace/src/Main.java
index df4501d..722bee8 100644
--- a/test/911-get-stack-trace/src/Main.java
+++ b/test/911-get-stack-trace/src/Main.java
@@ -109,13 +109,14 @@
     t.join();
   }
 
-  public static void print(String[] stack) {
+  public static void print(String[][] stack) {
     System.out.println("---------");
-    for (int i = 0; i < stack.length; i += 2) {
-      System.out.print(' ');
-      System.out.print(stack[i]);
-      System.out.print(' ');
-      System.out.println(stack[i + 1]);
+    for (String[] stackElement : stack) {
+      for (String part : stackElement) {
+        System.out.print(' ');
+        System.out.print(part);
+      }
+      System.out.println();
     }
   }
 
@@ -174,5 +175,5 @@
     volatile boolean stop = false;
   }
 
-  public static native String[] getStackTrace(Thread thread, int start, int max);
+  public static native String[][] getStackTrace(Thread thread, int start, int max);
 }
diff --git a/test/911-get-stack-trace/stack_trace.cc b/test/911-get-stack-trace/stack_trace.cc
index e7d9380..b5b5678 100644
--- a/test/911-get-stack-trace/stack_trace.cc
+++ b/test/911-get-stack-trace/stack_trace.cc
@@ -16,10 +16,13 @@
 
 #include "stack_trace.h"
 
+#include <inttypes.h>
 #include <memory>
 #include <stdio.h>
 
 #include "base/logging.h"
+#include "base/macros.h"
+#include "base/stringprintf.h"
 #include "jni.h"
 #include "openjdkjvmti/jvmti.h"
 #include "ScopedLocalRef.h"
@@ -44,8 +47,7 @@
     }
   }
 
-  auto callback = [&](jint i) -> jstring {
-    size_t method_index = static_cast<size_t>(i) / 2;
+  auto callback = [&](jint method_index) -> jobjectArray {
     char* name;
     char* sig;
     char* gen;
@@ -58,12 +60,20 @@
         return nullptr;
       }
     }
-    jstring callback_result;
-    if (i % 2 == 0) {
-      callback_result = name == nullptr ? nullptr : env->NewStringUTF(name);
-    } else {
-      callback_result = sig == nullptr ? nullptr : env->NewStringUTF(sig);
-    }
+
+    auto inner_callback = [&](jint component_index) -> jstring {
+      switch (component_index) {
+        case 0:
+          return (name == nullptr) ? nullptr : env->NewStringUTF(name);
+        case 1:
+          return (sig == nullptr) ? nullptr : env->NewStringUTF(sig);
+        case 2:
+          return env->NewStringUTF(StringPrintf("%" PRId64, frames[method_index].location).c_str());
+      }
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+    };
+    jobjectArray inner_array = CreateObjectArray(env, 3, "java/lang/String", inner_callback);
 
     if (name != nullptr) {
       jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
@@ -74,9 +84,10 @@
     if (gen != nullptr) {
       jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(gen));
     }
-    return callback_result;
+
+    return inner_array;
   };
-  return CreateObjectArray(env, 2 * count, "java/lang/String", callback);
+  return CreateObjectArray(env, count, "[Ljava/lang/String;", callback);
 }
 
 // Don't do anything
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index 8002cfa..e5fa53f 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -1,7 +1,7 @@
 ---
 true true
-root@root --(stack-local)--> 1@1000 [size=16, length=-1]
-root@root --(stack-local)--> 3000@0 [size=132, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=11,location= 31])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
@@ -21,12 +21,6 @@
 5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
-root@root --(stack-local)--> 1@1000 [size=16, length=-1]
-root@root --(stack-local)--> 2@1000 [size=16, length=-1]
-root@root --(stack-local)--> 3000@0 [size=132, length=-1]
-root@root --(thread)--> 2@1000 [size=16, length=-1]
-root@root --(thread)--> 3000@0 [size=132, length=-1]
-0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=132, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
@@ -46,7 +40,9 @@
 ---
 root@root --(jni-global)--> 1@1000 [size=16, length=-1]
 root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
-root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=10,location= 6])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 6])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=3,location= 18])--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
@@ -66,13 +62,6 @@
 5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
-root@root --(jni-global)--> 1@1000 [size=16, length=-1]
-root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
-root@root --(stack-local)--> 1@1000 [size=16, length=-1]
-root@root --(stack-local)--> 2@1000 [size=16, length=-1]
-root@root --(thread)--> 1@1000 [size=16, length=-1]
-root@root --(thread)--> 2@1000 [size=16, length=-1]
-root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=132, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 340671d..7b00fcd 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -269,6 +269,43 @@
       jvmtiHeapReferenceInfo info_;
     };
 
+    class StackLocalElement : public Elem {
+     public:
+      StackLocalElement(const std::string& referrer,
+                        const std::string& referree,
+                        jlong size,
+                        jint length,
+                        const jvmtiHeapReferenceInfo* reference_info)
+          : Elem(referrer, referree, size, length) {
+        memcpy(&info_, reference_info, sizeof(jvmtiHeapReferenceInfo));
+      }
+
+     protected:
+      std::string PrintArrowType() const OVERRIDE {
+        char* name = nullptr;
+        if (info_.stack_local.method != nullptr) {
+          jvmti_env->GetMethodName(info_.stack_local.method, &name, nullptr, nullptr);
+        }
+        std::string ret = StringPrintf("stack-local[id=%" PRId64 ",tag=%" PRId64 ",depth=%d,"
+                                       "method=%s,vreg=%d,location=% " PRId64 "]",
+                                       info_.stack_local.thread_id,
+                                       info_.stack_local.thread_tag,
+                                       info_.stack_local.depth,
+                                       name == nullptr ? "<null>" : name,
+                                       info_.stack_local.slot,
+                                       info_.stack_local.location);
+        if (name != nullptr) {
+          jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
+        }
+
+        return ret;
+      }
+
+     private:
+      const std::string string_;
+      jvmtiHeapReferenceInfo info_;
+    };
+
     // For simple or unimplemented cases.
     class StringElement : public Elem {
      public:
@@ -380,11 +417,11 @@
                                                          length,
                                                          "monitor"));
         case JVMTI_HEAP_REFERENCE_STACK_LOCAL:
-          return std::unique_ptr<Elem>(new StringElement(referrer,
-                                                         referree,
-                                                         size,
-                                                         length,
-                                                         "stack-local"));
+          return std::unique_ptr<Elem>(new StackLocalElement(referrer,
+                                                             referree,
+                                                             size,
+                                                             length,
+                                                             reference_info));
         case JVMTI_HEAP_REFERENCE_JNI_LOCAL:
           return std::unique_ptr<Elem>(new JNILocalElement(referrer,
                                                            referree,
diff --git a/test/913-heaps/src/Main.java b/test/913-heaps/src/Main.java
index a6ace9a..564596e 100644
--- a/test/913-heaps/src/Main.java
+++ b/test/913-heaps/src/Main.java
@@ -85,7 +85,7 @@
     v.add("0@0", "1@1000");  // tmpStorage[0] --(array-element)--> a.
 
     doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, null, v, null);
-    doFollowReferencesTestImpl(a.foo, Integer.MAX_VALUE, -1, null, v, "2@1000");
+    doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, null, v, "3@1001");
 
     tmpStorage.clear();
   }
@@ -96,7 +96,7 @@
     A a = createTree(v);
 
     doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, a, v, null);
-    doFollowReferencesTestImpl(a.foo, Integer.MAX_VALUE, -1, a, v, "2@1000");
+    doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, a, v, "3@1001");
   }
 
   private static void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 28e1e60..b515130 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -440,9 +440,11 @@
   629-vdex-speed
 
 # This test fails without an image.
+# 964 often times out due to the large number of classes it tries to compile.
 TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
   137-cfi \
-  138-duplicate-classes-check
+  138-duplicate-classes-check \
+  964-default-iface-init
 
 ifneq (,$(filter no-dex2oat,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-dex2oat, \
@@ -512,9 +514,11 @@
 # Test 906 iterates the heap filtering with different options. No instances should be created
 # between those runs to be able to have precise checks.
 # Test 902 hits races with the JIT compiler. b/32821077
+# Test 626-const-class-linking can deadlock with JIT. b/33567581
 # Test 629 requires compilation.
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
   137-cfi \
+  626-const-class-linking \
   629-vdex-speed \
   902-hello-transformation \
   904-object-allocation \