Merge "MIPS: Assemblers changes needed for optimizing compiler"
diff --git a/Android.mk b/Android.mk
index b8ba9f2..fcf70ff 100644
--- a/Android.mk
+++ b/Android.mk
@@ -89,6 +89,7 @@
 include $(art_path)/tools/Android.mk
 include $(art_path)/tools/ahat/Android.mk
 include $(art_path)/tools/dexfuzz/Android.mk
+include $(art_path)/tools/dmtracedump/Android.mk
 include $(art_path)/sigchainlib/Android.mk
 
 
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 3272c27..592843e 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -57,7 +57,7 @@
   endif
   ifeq ($(1),optimizing)
     core_compile_options += --compiler-backend=Optimizing
-    core_dex2oat_dependency += $(DEX2OAT)
+    core_dex2oat_dependency := $(DEX2OAT)
     core_infix := -optimizing
   endif
   ifeq ($(1),interpreter)
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index d5ac341..dbcc868 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -16,6 +16,11 @@
 
 #include "mir_to_lir-inl.h"
 
+// Mac does not provide endian.h, so we'll use byte order agnostic code.
+#ifndef __APPLE__
+#include <endian.h>
+#endif
+
 #include "base/bit_vector-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
@@ -841,7 +846,7 @@
         references_buffer[i] = static_cast<uint8_t>(
             raw_storage[i / sizeof(raw_storage[0])] >> (8u * (i % sizeof(raw_storage[0]))));
       }
-      native_gc_map_builder.AddEntry(native_offset, &references_buffer[0]);
+      native_gc_map_builder.AddEntry(native_offset, references_buffer.data());
       prev_mir = mir;
     }
   }
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index af93aab..e1a2838 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -73,6 +73,7 @@
     false,  // kIntrinsicUnsafeGet
     false,  // kIntrinsicUnsafePut
     true,   // kIntrinsicSystemArrayCopyCharArray
+    true,   // kIntrinsicSystemArrayCopy
 };
 static_assert(arraysize(kIntrinsicIsStatic) == kInlineOpNop,
               "arraysize of kIntrinsicIsStatic unexpected");
@@ -121,6 +122,8 @@
 static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static");
 static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray],
               "SystemArrayCopyCharArray must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopy],
+              "SystemArrayCopy must be static");
 
 MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke) {
   MIR* insn = mir_graph->NewMIR();
@@ -326,6 +329,9 @@
     // kProtoCacheCharArrayICharArrayII_V
     { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt,
         kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt} },
+    // kProtoCacheObjectIObjectII_V
+    { kClassCacheVoid, 5, {kClassCacheJavaLangObject, kClassCacheInt,
+        kClassCacheJavaLangObject, kClassCacheInt, kClassCacheInt} },
     // kProtoCacheIICharArrayI_V
     { kClassCacheVoid, 4, { kClassCacheInt, kClassCacheInt, kClassCacheJavaLangCharArray,
         kClassCacheInt } },
@@ -481,6 +487,8 @@
 
     INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray,
               0),
+    INTRINSIC(JavaLangSystem, ArrayCopy, ObjectIObjectII_V , kIntrinsicSystemArrayCopy,
+              0),
 
     INTRINSIC(JavaLangInteger, RotateRight, II_I, kIntrinsicRotateRight, k32),
     INTRINSIC(JavaLangLong, RotateRight, JI_J, kIntrinsicRotateRight, k64),
@@ -653,6 +661,7 @@
     case kIntrinsicNumberOfTrailingZeros:
     case kIntrinsicRotateRight:
     case kIntrinsicRotateLeft:
+    case kIntrinsicSystemArrayCopy:
       return false;   // not implemented in quick.
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 8458806..5ce110c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -259,6 +259,7 @@
       kProtoCacheObjectJ_Object,
       kProtoCacheObjectJObject_V,
       kProtoCacheCharArrayICharArrayII_V,
+      kProtoCacheObjectIObjectII_V,
       kProtoCacheIICharArrayI_V,
       kProtoCacheByteArrayIII_String,
       kProtoCacheIICharArray_String,
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
index 3e9fb96..c425fc8 100644
--- a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
@@ -28,8 +28,8 @@
 // When we are generating the CFI code, we do not know the instuction offsets,
 // this class stores the LIR references and patches the instruction stream later.
 class LazyDebugFrameOpCodeWriter FINAL
-    : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> {
-  typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base;
+    : public DebugFrameOpCodeWriter<ArenaVector<uint8_t>> {
+  typedef DebugFrameOpCodeWriter<ArenaVector<uint8_t>> Base;
  public:
   // This method is implicitely called the by opcode writers.
   virtual void ImplicitlyAdvancePC() OVERRIDE {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f74b079..74f19a1 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -456,14 +456,6 @@
                                 type ## _ENTRYPOINT_OFFSET(4, offset)); \
     }
 
-const std::vector<uint8_t>* CompilerDriver::CreateInterpreterToInterpreterBridge() const {
-  CREATE_TRAMPOLINE(INTERPRETER, kInterpreterAbi, pInterpreterToInterpreterBridge)
-}
-
-const std::vector<uint8_t>* CompilerDriver::CreateInterpreterToCompiledCodeBridge() const {
-  CREATE_TRAMPOLINE(INTERPRETER, kInterpreterAbi, pInterpreterToCompiledCodeBridge)
-}
-
 const std::vector<uint8_t>* CompilerDriver::CreateJniDlsymLookup() const {
   CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup)
 }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 11e782f..0dc8261 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -155,10 +155,6 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateInterpreterToInterpreterBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  const std::vector<uint8_t>* CreateInterpreterToCompiledCodeBridge() const
-      SHARED_REQUIRES(Locks::mutator_lock_);
   const std::vector<uint8_t>* CreateJniDlsymLookup() const
       SHARED_REQUIRES(Locks::mutator_lock_);
   const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
index d8077d5..60241f7 100644
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -31,8 +31,10 @@
 //  * Choose the most compact encoding of a given opcode.
 //  * Keep track of current state and convert absolute values to deltas.
 //  * Divide by header-defined factors as appropriate.
-template<typename Allocator = std::allocator<uint8_t> >
-class DebugFrameOpCodeWriter : private Writer<Allocator> {
+template<typename Vector = std::vector<uint8_t> >
+class DebugFrameOpCodeWriter : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   // To save space, DWARF divides most offsets by header-defined factors.
   // They are used in integer divisions, so we make them constants.
@@ -288,11 +290,12 @@
 
   void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
 
-  using Writer<Allocator>::data;
+  using Writer<Vector>::data;
 
   DebugFrameOpCodeWriter(bool enabled = true,
-                         const Allocator& alloc = Allocator())
-      : Writer<Allocator>(&opcodes_),
+                         const typename Vector::allocator_type& alloc =
+                             typename Vector::allocator_type())
+      : Writer<Vector>(&opcodes_),
         enabled_(enabled),
         opcodes_(alloc),
         current_cfa_offset_(0),
@@ -318,7 +321,7 @@
   }
 
   bool enabled_;  // If disabled all writes are no-ops.
-  std::vector<uint8_t, Allocator> opcodes_;
+  Vector opcodes_;
   int current_cfa_offset_;
   int current_pc_;
   bool uses_dwarf3_features_;
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index f5b9ca5..d9b367b 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -29,9 +29,11 @@
 
 // 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
 // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
-template< typename Allocator >
+template <typename Vector>
 struct FNVHash {
-  size_t operator()(const std::vector<uint8_t, Allocator>& v) const {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
+  size_t operator()(const Vector& v) const {
     uint32_t hash = 2166136261u;
     for (size_t i = 0; i < v.size(); i++) {
       hash = (hash ^ v[i]) * 16777619u;
@@ -52,8 +54,10 @@
  *     EndTag();
  *   EndTag();
  */
-template< typename Allocator = std::allocator<uint8_t> >
-class DebugInfoEntryWriter FINAL : private Writer<Allocator> {
+template <typename Vector = std::vector<uint8_t>>
+class DebugInfoEntryWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   // Start debugging information entry.
   void StartTag(Tag tag, Children children) {
@@ -176,12 +180,13 @@
     return patch_locations_;
   }
 
-  using Writer<Allocator>::data;
+  using Writer<Vector>::data;
 
   DebugInfoEntryWriter(bool is64bitArch,
-                       std::vector<uint8_t, Allocator>* debug_abbrev,
-                       const Allocator& alloc = Allocator())
-      : Writer<Allocator>(&entries_),
+                       Vector* debug_abbrev,
+                       const typename Vector::allocator_type& alloc =
+                           typename Vector::allocator_type())
+      : Writer<Vector>(&entries_),
         debug_abbrev_(debug_abbrev),
         current_abbrev_(alloc),
         abbrev_codes_(alloc),
@@ -221,7 +226,7 @@
                                                   NextAbbrevCode()));
     int abbrev_code = it.first->second;
     if (UNLIKELY(it.second)) {  // Inserted new entry.
-      const std::vector<uint8_t, Allocator>& abbrev = it.first->first;
+      const Vector& abbrev = it.first->first;
       debug_abbrev_.Pop();  // Remove abbrev table terminator.
       debug_abbrev_.PushUleb128(abbrev_code);
       debug_abbrev_.PushData(abbrev.data(), abbrev.size());
@@ -234,13 +239,13 @@
 
  private:
   // Fields for writing and deduplication of abbrevs.
-  Writer<Allocator> debug_abbrev_;
-  std::vector<uint8_t, Allocator> current_abbrev_;
-  std::unordered_map<std::vector<uint8_t, Allocator>, int,
-                     FNVHash<Allocator> > abbrev_codes_;
+  Writer<Vector> debug_abbrev_;
+  Vector current_abbrev_;
+  std::unordered_map<Vector, int,
+                     FNVHash<Vector> > abbrev_codes_;
 
   // Fields for writing of debugging information entries.
-  std::vector<uint8_t, Allocator> entries_;
+  Vector entries_;
   bool is64bit_;
   int depth_ = 0;
   size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
diff --git a/compiler/dwarf/debug_line_opcode_writer.h b/compiler/dwarf/debug_line_opcode_writer.h
index bdc25e4..201f0b4 100644
--- a/compiler/dwarf/debug_line_opcode_writer.h
+++ b/compiler/dwarf/debug_line_opcode_writer.h
@@ -30,8 +30,10 @@
 //  * Choose the most compact encoding of a given opcode.
 //  * Keep track of current state and convert absolute values to deltas.
 //  * Divide by header-defined factors as appropriate.
-template<typename Allocator = std::allocator<uint8_t>>
-class DebugLineOpCodeWriter FINAL : private Writer<Allocator> {
+template<typename Vector = std::vector<uint8_t>>
+class DebugLineOpCodeWriter FINAL : private Writer<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   static constexpr int kOpcodeBase = 13;
   static constexpr bool kDefaultIsStmt = true;
@@ -212,12 +214,13 @@
     return patch_locations_;
   }
 
-  using Writer<Allocator>::data;
+  using Writer<Vector>::data;
 
   DebugLineOpCodeWriter(bool use64bitAddress,
                         int codeFactorBits,
-                        const Allocator& alloc = Allocator())
-      : Writer<Allocator>(&opcodes_),
+                        const typename Vector::allocator_type& alloc =
+                            typename Vector::allocator_type())
+      : Writer<Vector>(&opcodes_),
         opcodes_(alloc),
         uses_dwarf3_features_(false),
         use_64bit_address_(use64bitAddress),
@@ -234,7 +237,7 @@
     return offset >> code_factor_bits_;
   }
 
-  std::vector<uint8_t, Allocator> opcodes_;
+  Vector opcodes_;
   bool uses_dwarf3_features_;
   bool use_64bit_address_;
   int code_factor_bits_;
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index ae57755..b7eff19 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -36,21 +36,23 @@
 // In particular, it is not related to machine architecture.
 
 // Write common information entry (CIE) to .debug_frame or .eh_frame section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugFrameCIE(bool is64bit,
                         ExceptionHeaderValueApplication address_type,
                         Reg return_address_register,
-                        const DebugFrameOpCodeWriter<Allocator>& opcodes,
+                        const DebugFrameOpCodeWriter<Vector>& opcodes,
                         CFIFormat format,
                         std::vector<uint8_t>* debug_frame) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_frame);
   size_t cie_header_start_ = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
   writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF);  // CIE id.
   writer.PushUint8(1);   // Version.
   writer.PushString("zR");
-  writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
-  writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor);
+  writer.PushUleb128(DebugFrameOpCodeWriter<Vector>::kCodeAlignmentFactor);
+  writer.PushSleb128(DebugFrameOpCodeWriter<Vector>::kDataAlignmentFactor);
   writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
   writer.PushUleb128(1);  // z: Augmentation data size.
   if (is64bit) {
@@ -74,13 +76,15 @@
 }
 
 // Write frame description entry (FDE) to .debug_frame or .eh_frame section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugFrameFDE(bool is64bit, size_t cie_offset,
                         uint64_t initial_address, uint64_t address_range,
-                        const std::vector<uint8_t, Allocator>* opcodes,
+                        const Vector* opcodes,
                         CFIFormat format,
                         std::vector<uint8_t>* debug_frame,
                         std::vector<uintptr_t>* debug_frame_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_frame);
   size_t fde_header_start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
@@ -107,11 +111,13 @@
 }
 
 // Write compilation unit (CU) to .debug_info section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
-                      const DebugInfoEntryWriter<Allocator>& entries,
+                      const DebugInfoEntryWriter<Vector>& entries,
                       std::vector<uint8_t>* debug_info,
                       std::vector<uintptr_t>* debug_info_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_info);
   size_t start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
@@ -135,12 +141,14 @@
 };
 
 // Write line table to .debug_line section.
-template<typename Allocator>
+template<typename Vector>
 void WriteDebugLineTable(const std::vector<std::string>& include_directories,
                          const std::vector<FileEntry>& files,
-                         const DebugLineOpCodeWriter<Allocator>& opcodes,
+                         const DebugLineOpCodeWriter<Vector>& opcodes,
                          std::vector<uint8_t>* debug_line,
                          std::vector<uintptr_t>* debug_line_patches) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
   Writer<> writer(debug_line);
   size_t header_start = writer.data()->size();
   writer.PushUint32(0);  // Section-length placeholder.
@@ -151,13 +159,13 @@
   size_t header_length_pos = writer.data()->size();
   writer.PushUint32(0);  // Header-length placeholder.
   writer.PushUint8(1 << opcodes.GetCodeFactorBits());
-  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0);
-  writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase);
-  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange);
-  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase);
-  static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = {
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kDefaultIsStmt ? 1 : 0);
+  writer.PushInt8(DebugLineOpCodeWriter<Vector>::kLineBase);
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kLineRange);
+  writer.PushUint8(DebugLineOpCodeWriter<Vector>::kOpcodeBase);
+  static const int opcode_lengths[DebugLineOpCodeWriter<Vector>::kOpcodeBase] = {
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
-  for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) {
+  for (int i = 1; i < DebugLineOpCodeWriter<Vector>::kOpcodeBase; i++) {
     writer.PushUint8(opcode_lengths[i]);
   }
   for (const std::string& directory : include_directories) {
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index e703aee..42c32c4 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -26,8 +26,10 @@
 namespace dwarf {
 
 // The base class for all DWARF writers.
-template<typename Allocator = std::allocator<uint8_t>>
+template <typename Vector = std::vector<uint8_t>>
 class Writer {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
   void PushUint8(int value) {
     DCHECK_GE(value, 0);
@@ -116,8 +118,9 @@
     data_->insert(data_->end(), p, p + size);
   }
 
-  template<typename Allocator2>
-  void PushData(const std::vector<uint8_t, Allocator2>* buffer) {
+  template<typename Vector2>
+  void PushData(const Vector2* buffer) {
+    static_assert(std::is_same<typename Vector2::value_type, uint8_t>::value, "Invalid value type");
     data_->insert(data_->end(), buffer->begin(), buffer->end());
   }
 
@@ -155,14 +158,14 @@
     data_->resize(RoundUp(data_->size(), alignment), 0);
   }
 
-  const std::vector<uint8_t, Allocator>* data() const {
+  const Vector* data() const {
     return data_;
   }
 
-  explicit Writer(std::vector<uint8_t, Allocator>* buffer) : data_(buffer) { }
+  explicit Writer(Vector* buffer) : data_(buffer) { }
 
  private:
-  std::vector<uint8_t, Allocator>* data_;
+  Vector* const data_;
 
   DISALLOW_COPY_AND_ASSIGN(Writer);
 };
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
index 45e3fc5..2ef7f1a 100644
--- a/compiler/gc_map_builder.h
+++ b/compiler/gc_map_builder.h
@@ -26,14 +26,16 @@
 
 class GcMapBuilder {
  public:
-  template <typename Alloc>
-  GcMapBuilder(std::vector<uint8_t, Alloc>* table, size_t entries, uint32_t max_native_offset,
+  template <typename Vector>
+  GcMapBuilder(Vector* table, size_t entries, uint32_t max_native_offset,
                size_t references_width)
       : entries_(entries), references_width_(entries != 0u ? references_width : 0u),
         native_offset_width_(entries != 0 && max_native_offset != 0
                              ? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u
                              : 0u),
         in_use_(entries) {
+    static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
     // Resize table and set up header.
     table->resize((EntryWidth() * entries) + sizeof(uint32_t));
     table_ = table->data();
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d9f8fcb..4310be6 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -55,6 +55,7 @@
 #include "mirror/string-inl.h"
 #include "oat.h"
 #include "oat_file.h"
+#include "oat_file_manager.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
@@ -126,8 +127,6 @@
                         const std::string& oat_location) {
   CHECK(!image_filename.empty());
 
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-
   std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str()));
   if (oat_file.get() == nullptr) {
     PLOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location;
@@ -141,7 +140,8 @@
     oat_file->Erase();
     return false;
   }
-  CHECK_EQ(class_linker->RegisterOatFile(oat_file_), oat_file_);
+  Runtime::Current()->GetOatFileManager().RegisterOatFile(
+      std::unique_ptr<const OatFile>(oat_file_));
 
   interpreter_to_interpreter_bridge_offset_ =
       oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset();
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index a3e889f..5f4f760 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -110,8 +110,9 @@
       (static_cast<uint32_t>(addr[3]) << 8);
 }
 
-template <typename Alloc>
-uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
 }
 
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 2d474c2..006d6fb 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -37,8 +37,8 @@
   void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
   static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
 
-  template <typename Alloc>
-  static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+  template <typename Vector>
+  static uint32_t GetInsn32(Vector* code, uint32_t offset);
 
   // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
   static constexpr int32_t kPcDisplacement = 4;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 4ddd457..7b410bf 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1091,8 +1091,6 @@
       field.reset(compiler_driver_->Create ## fn_name()); \
       offset += field->size();
 
-    DO_TRAMPOLINE(interpreter_to_interpreter_bridge_, InterpreterToInterpreterBridge);
-    DO_TRAMPOLINE(interpreter_to_compiled_code_bridge_, InterpreterToCompiledCodeBridge);
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_, QuickGenericJniTrampoline);
     DO_TRAMPOLINE(quick_imt_conflict_trampoline_, QuickImtConflictTrampoline);
@@ -1350,8 +1348,6 @@
         DCHECK_OFFSET(); \
       } while (false)
 
-    DO_TRAMPOLINE(interpreter_to_interpreter_bridge_);
-    DO_TRAMPOLINE(interpreter_to_compiled_code_bridge_);
     DO_TRAMPOLINE(jni_dlsym_lookup_);
     DO_TRAMPOLINE(quick_generic_jni_trampoline_);
     DO_TRAMPOLINE(quick_imt_conflict_trampoline_);
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 760fb7c..48fbc0b 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -285,8 +285,6 @@
   OatHeader* oat_header_;
   std::vector<OatDexFile*> oat_dex_files_;
   std::vector<OatClass*> oat_classes_;
-  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_interpreter_bridge_;
-  std::unique_ptr<const std::vector<uint8_t>> interpreter_to_compiled_code_bridge_;
   std::unique_ptr<const std::vector<uint8_t>> jni_dlsym_lookup_;
   std::unique_ptr<const std::vector<uint8_t>> quick_generic_jni_trampoline_;
   std::unique_ptr<const std::vector<uint8_t>> quick_imt_conflict_trampoline_;
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index b0e83b0..5b34687 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -42,8 +42,8 @@
 // successor and the successor can only be reached from them.
 static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
   if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false;
-  HBasicBlock* succ1 = block1->GetSuccessor(0);
-  HBasicBlock* succ2 = block2->GetSuccessor(0);
+  HBasicBlock* succ1 = block1->GetSuccessors()[0];
+  HBasicBlock* succ2 = block2->GetSuccessors()[0];
   return succ1 == succ2 && succ1->GetPredecessors().size() == 2u;
 }
 
@@ -108,7 +108,7 @@
   if (!BlocksDoMergeTogether(true_block, false_block)) {
     return;
   }
-  HBasicBlock* merge_block = true_block->GetSuccessor(0);
+  HBasicBlock* merge_block = true_block->GetSuccessors()[0];
   if (!merge_block->HasSinglePhi()) {
     return;
   }
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 960f4d9..bcc3240 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -797,8 +797,8 @@
     HBasicBlock* new_pre_header = header->GetDominator();
     DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
     HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessor(0);  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessor(1);  // False successor.
+    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
 
     dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
     deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
@@ -845,14 +845,14 @@
     DCHECK(header->IsLoopHeader());
     HBasicBlock* pre_header = header->GetDominator();
     if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessor(0) == pre_header);
+      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
     } else {
       DCHECK(deopt_block == pre_header);
     }
     HGraph* graph = header->GetGraph();
     HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
     if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessor(1));
+      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
     }
 
     HIntConstant* const_instr = graph->GetIntConstant(constant);
@@ -926,7 +926,7 @@
     DCHECK(header->IsLoopHeader());
     HBasicBlock* pre_header = header->GetDominator();
     if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessor(0) == pre_header);
+      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
     } else {
       DCHECK(deopt_block == pre_header);
     }
@@ -965,7 +965,8 @@
             suspend_check->GetEnvironment(), header);
       }
 
-      HArrayLength* new_array_length = new (graph->GetArena()) HArrayLength(array);
+      HArrayLength* new_array_length
+          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
       deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
 
       if (loop_entry_test_block_added) {
@@ -1145,7 +1146,6 @@
       return nullptr;
     }
     uint32_t block_id = basic_block->GetBlockId();
-    DCHECK_LT(block_id, maps_.size());
     return &maps_[block_id];
   }
 
@@ -1495,10 +1495,10 @@
     // Start with input 1. Input 0 is from the incoming block.
     HInstruction* input1 = phi->InputAt(1);
     DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
-        *phi->GetBlock()->GetPredecessor(1)));
+        *phi->GetBlock()->GetPredecessors()[1]));
     for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
       DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
-          *phi->GetBlock()->GetPredecessor(i)));
+          *phi->GetBlock()->GetPredecessors()[i]));
       if (input1 != phi->InputAt(i)) {
         return false;
       }
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 08e1e36..c9afdf2 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -71,9 +71,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -91,7 +91,7 @@
   HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(block2);
   HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0);
-  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check2 = new (&allocator_)
       HBoundsCheck(parameter2, array_length, 0);
   HArraySet* array_set = new (&allocator_) HArraySet(
@@ -104,7 +104,7 @@
   HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(block3);
   null_check = new (&allocator_) HNullCheck(parameter1, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   cmp = new (&allocator_) HLessThan(parameter2, array_length);
   if_inst = new (&allocator_) HIf(cmp);
   block3->AddInstruction(null_check);
@@ -115,7 +115,7 @@
   HBasicBlock* block4 = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(block4);
   null_check = new (&allocator_) HNullCheck(parameter1, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check4 = new (&allocator_)
       HBoundsCheck(parameter2, array_length, 0);
   array_set = new (&allocator_) HArraySet(
@@ -128,7 +128,7 @@
   HBasicBlock* block5 = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(block5);
   null_check = new (&allocator_) HNullCheck(parameter1, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check5 = new (&allocator_)
       HBoundsCheck(parameter2, array_length, 0);
   array_set = new (&allocator_) HArraySet(
@@ -168,9 +168,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -190,7 +190,7 @@
   graph_->AddBlock(block2);
   HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, parameter2, constant_max_int);
   HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0);
-  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
   HInstruction* cmp2 = new (&allocator_) HGreaterThanOrEqual(add, array_length);
   if_inst = new (&allocator_) HIf(cmp2);
   block2->AddInstruction(add);
@@ -232,9 +232,9 @@
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
   HInstruction* parameter1 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimNot);  // array
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);  // array
   HInstruction* parameter2 = new (&allocator_)
-      HParameterValue(0, Primitive::kPrimInt);  // i
+      HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);  // i
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
 
@@ -245,7 +245,7 @@
   HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(block1);
   HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0);
-  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
   HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(parameter2, array_length);
   HIf* if_inst = new (&allocator_) HIf(cmp);
   block1->AddInstruction(null_check);
@@ -295,7 +295,8 @@
   HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_5 = graph_->GetIntConstant(5);
@@ -308,7 +309,7 @@
   entry->AddSuccessor(block);
 
   HNullCheck* null_check = new (&allocator_) HNullCheck(parameter, 0);
-  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check6 = new (&allocator_)
       HBoundsCheck(constant_6, array_length, 0);
   HInstruction* array_set = new (&allocator_) HArraySet(
@@ -319,7 +320,7 @@
   block->AddInstruction(array_set);
 
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check5 = new (&allocator_)
       HBoundsCheck(constant_5, array_length, 0);
   array_set = new (&allocator_) HArraySet(
@@ -330,7 +331,7 @@
   block->AddInstruction(array_set);
 
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check4 = new (&allocator_)
       HBoundsCheck(constant_4, array_length, 0);
   array_set = new (&allocator_) HArraySet(
@@ -363,7 +364,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -389,7 +391,7 @@
 
   HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
   HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
-  HInstruction* array_length = new (allocator) HArrayLength(null_check);
+  HInstruction* array_length = new (allocator) HArrayLength(null_check, 0);
   HInstruction* cmp = nullptr;
   if (cond == kCondGE) {
     cmp = new (allocator) HGreaterThanOrEqual(phi, array_length);
@@ -406,7 +408,7 @@
   phi->AddInput(constant_initial);
 
   null_check = new (allocator) HNullCheck(parameter, 0);
-  array_length = new (allocator) HArrayLength(null_check);
+  array_length = new (allocator) HArrayLength(null_check, 0);
   HInstruction* bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
       null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
@@ -477,7 +479,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -489,7 +492,7 @@
   graph->AddBlock(block);
   entry->AddSuccessor(block);
   HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
-  HInstruction* array_length = new (allocator) HArrayLength(null_check);
+  HInstruction* array_length = new (allocator) HArrayLength(null_check, 0);
   block->AddInstruction(null_check);
   block->AddInstruction(array_length);
   block->AddInstruction(new (allocator) HGoto());
@@ -522,7 +525,7 @@
 
   HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1);
   null_check = new (allocator) HNullCheck(parameter, 0);
-  array_length = new (allocator) HArrayLength(null_check);
+  array_length = new (allocator) HArrayLength(null_check, 0);
   HInstruction* bounds_check = new (allocator) HBoundsCheck(add, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
       null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
@@ -631,7 +634,7 @@
   phi->AddInput(constant_initial);
 
   HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0);
-  HArrayLength* array_length = new (allocator) HArrayLength(null_check);
+  HArrayLength* array_length = new (allocator) HArrayLength(null_check, 0);
   HInstruction* bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
   HInstruction* array_set = new (allocator) HArraySet(
       null_check, bounds_check, constant_10, Primitive::kPrimInt, 0);
@@ -689,7 +692,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_initial = graph->GetIntConstant(initial);
@@ -716,7 +720,7 @@
 
   HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
   HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
-  HInstruction* array_length = new (allocator) HArrayLength(null_check);
+  HInstruction* array_length = new (allocator) HArrayLength(null_check, 0);
   HInstruction* cmp = nullptr;
   if (cond == kCondGE) {
     cmp = new (allocator) HGreaterThanOrEqual(phi, array_length);
@@ -732,7 +736,7 @@
   phi->AddInput(constant_initial);
 
   null_check = new (allocator) HNullCheck(parameter, 0);
-  array_length = new (allocator) HArrayLength(null_check);
+  array_length = new (allocator) HArrayLength(null_check, 0);
   HInstruction* sub = new (allocator) HSub(Primitive::kPrimInt, array_length, phi);
   HInstruction* add_minus_1 = new (allocator)
       HAdd(Primitive::kPrimInt, sub, constant_minus_1);
@@ -791,7 +795,8 @@
   HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(entry);
   graph_->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HInstruction* constant_0 = graph_->GetIntConstant(0);
@@ -811,7 +816,7 @@
   graph_->AddBlock(outer_header);
   HPhi* phi_i = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
   HNullCheck* null_check = new (&allocator_) HNullCheck(parameter, 0);
-  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check);
+  HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
   HAdd* add = new (&allocator_) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
   HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi_i, add);
   HIf* if_inst = new (&allocator_) HIf(cmp);
@@ -827,7 +832,7 @@
   graph_->AddBlock(inner_header);
   HPhi* phi_j = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HSub* sub = new (&allocator_) HSub(Primitive::kPrimInt, array_length, phi_i);
   add = new (&allocator_) HAdd(Primitive::kPrimInt, sub, constant_minus_1);
   cmp = new (&allocator_) HGreaterThanOrEqual(phi_j, add);
@@ -844,20 +849,20 @@
   HBasicBlock* inner_body_compare = new (&allocator_) HBasicBlock(graph_);
   graph_->AddBlock(inner_body_compare);
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check1 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0);
   HArrayGet* array_get_j = new (&allocator_)
-      HArrayGet(null_check, bounds_check1, Primitive::kPrimInt);
+      HArrayGet(null_check, bounds_check1, Primitive::kPrimInt, 0);
   inner_body_compare->AddInstruction(null_check);
   inner_body_compare->AddInstruction(array_length);
   inner_body_compare->AddInstruction(bounds_check1);
   inner_body_compare->AddInstruction(array_get_j);
   HInstruction* j_plus_1 = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1);
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HBoundsCheck* bounds_check2 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0);
   HArrayGet* array_get_j_plus_1 = new (&allocator_)
-      HArrayGet(null_check, bounds_check2, Primitive::kPrimInt);
+      HArrayGet(null_check, bounds_check2, Primitive::kPrimInt, 0);
   cmp = new (&allocator_) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1);
   if_inst = new (&allocator_) HIf(cmp);
   inner_body_compare->AddInstruction(j_plus_1);
@@ -873,10 +878,10 @@
   j_plus_1 = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1);
   // temp = array[j+1]
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HInstruction* bounds_check3 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0);
   array_get_j_plus_1 = new (&allocator_)
-      HArrayGet(null_check, bounds_check3, Primitive::kPrimInt);
+      HArrayGet(null_check, bounds_check3, Primitive::kPrimInt, 0);
   inner_body_swap->AddInstruction(j_plus_1);
   inner_body_swap->AddInstruction(null_check);
   inner_body_swap->AddInstruction(array_length);
@@ -884,16 +889,16 @@
   inner_body_swap->AddInstruction(array_get_j_plus_1);
   // array[j+1] = array[j]
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HInstruction* bounds_check4 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0);
   array_get_j = new (&allocator_)
-      HArrayGet(null_check, bounds_check4, Primitive::kPrimInt);
+      HArrayGet(null_check, bounds_check4, Primitive::kPrimInt, 0);
   inner_body_swap->AddInstruction(null_check);
   inner_body_swap->AddInstruction(array_length);
   inner_body_swap->AddInstruction(bounds_check4);
   inner_body_swap->AddInstruction(array_get_j);
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HInstruction* bounds_check5 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0);
   HArraySet* array_set_j_plus_1 = new (&allocator_)
       HArraySet(null_check, bounds_check5, array_get_j, Primitive::kPrimInt, 0);
@@ -903,7 +908,7 @@
   inner_body_swap->AddInstruction(array_set_j_plus_1);
   // array[j] = temp
   null_check = new (&allocator_) HNullCheck(parameter, 0);
-  array_length = new (&allocator_) HArrayLength(null_check);
+  array_length = new (&allocator_) HArrayLength(null_check, 0);
   HInstruction* bounds_check6 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0);
   HArraySet* array_set_j = new (&allocator_)
       HArraySet(null_check, bounds_check6, array_get_j_plus_1, Primitive::kPrimInt, 0);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 5acc5fd..5dd5be3 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -159,9 +159,13 @@
   int locals_index = locals_.size() - number_of_parameters;
   int parameter_index = 0;
 
+  const DexFile::MethodId& referrer_method_id =
+      dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
   if (!dex_compilation_unit_->IsStatic()) {
     // Add the implicit 'this' argument, not expressed in the signature.
-    HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++,
+    HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_,
+                                                              referrer_method_id.class_idx_,
+                                                              parameter_index++,
                                                               Primitive::kPrimNot,
                                                               true);
     entry_block_->AddInstruction(parameter);
@@ -170,11 +174,16 @@
     number_of_parameters--;
   }
 
-  uint32_t pos = 1;
-  for (int i = 0; i < number_of_parameters; i++) {
-    HParameterValue* parameter = new (arena_) HParameterValue(parameter_index++,
-                                                              Primitive::GetType(shorty[pos++]),
-                                                              false);
+  const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+  const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+  for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
+    HParameterValue* parameter = new (arena_) HParameterValue(
+        *dex_file_,
+        arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
+        parameter_index++,
+        Primitive::GetType(shorty[shorty_pos]),
+        false);
+    ++shorty_pos;
     entry_block_->AddInstruction(parameter);
     HLocal* local = GetLocalAt(locals_index++);
     // Store the parameter value in the local that the dex code will use
@@ -375,7 +384,7 @@
   // We do not split each edge separately, but rather create one boundary block
   // that all predecessors are relinked to. This preserves loop headers (b/23895756).
   for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlock(entry.first);
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
     for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
       if (GetTryItem(predecessor, try_block_info) != entry.second) {
         // Found a predecessor not covered by the same TryItem. Insert entering
@@ -392,10 +401,10 @@
   // Do a second pass over the try blocks and insert exit TryBoundaries where
   // the successor is not in the same TryItem.
   for (auto entry : try_block_info) {
-    HBasicBlock* try_block = graph_->GetBlock(entry.first);
+    HBasicBlock* try_block = graph_->GetBlocks()[entry.first];
     // NOTE: Do not use iterators because SplitEdge would invalidate them.
     for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
-      HBasicBlock* successor = try_block->GetSuccessor(i);
+      HBasicBlock* successor = try_block->GetSuccessors()[i];
 
       // If the successor is a try block, all of its predecessors must be
       // covered by the same TryItem. Otherwise the previous pass would have
@@ -581,7 +590,6 @@
 
 HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t dex_pc) const {
   DCHECK_GE(dex_pc, 0);
-  DCHECK_LT(static_cast<size_t>(dex_pc), branch_targets_.size());
   return branch_targets_[dex_pc];
 }
 
@@ -1176,10 +1184,9 @@
         verified_method->GetStringInitPcRegMap();
     auto map_it = string_init_map.find(dex_pc);
     if (map_it != string_init_map.end()) {
-      std::set<uint32_t> reg_set = map_it->second;
-      for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+      for (uint32_t reg : map_it->second) {
         HInstruction* load_local = LoadLocal(original_dex_register, Primitive::kPrimNot, dex_pc);
-        UpdateLocal(*set_it, load_local, dex_pc);
+        UpdateLocal(reg, load_local, dex_pc);
       }
     }
   } else {
@@ -1303,7 +1310,13 @@
       soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
-  return outer_class.Get() == cls.Get();
+  // GetOutermostCompilingClass returns null when the class is unresolved
+  // (e.g. if it derives from an unresolved class). This is bogus knowing that
+  // we are compiling it.
+  // When this happens we cannot establish a direct relation between the current
+  // class and the outer class, so we return false.
+  // (Note that this is only used for optimizing invokes and field accesses)
+  return (cls.Get() != nullptr) && (outer_class.Get() == cls.Get());
 }
 
 void HGraphBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
@@ -2877,7 +2890,6 @@
 }  // NOLINT(readability/fn_size)
 
 HLocal* HGraphBuilder::GetLocalAt(uint32_t register_index) const {
-  DCHECK_LT(register_index, locals_.size());
   return locals_[register_index];
 }
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 00f316c..2897006 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -155,7 +155,6 @@
 }
 
 bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
-  DCHECK_LT(current_block_index_, block_order_->size());
   DCHECK_EQ((*block_order_)[current_block_index_], current);
   return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
 }
@@ -172,7 +171,7 @@
 
 HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
   while (block->IsSingleJump()) {
-    block = block->GetSuccessor(0);
+    block = block->GetSuccessors()[0];
   }
   return block;
 }
@@ -547,11 +546,12 @@
           ? LocationSummary::kCallOnSlowPath
           : LocationSummary::kNoCall);
   LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind);
-  locations->SetInAt(0, Location::RequiresRegister());
   if (cls->NeedsAccessCheck()) {
+    locations->SetInAt(0, Location::NoLocation());
     locations->AddTemp(runtime_type_index_location);
     locations->SetOut(runtime_return_location);
   } else {
+    locations->SetInAt(0, Location::RequiresRegister());
     locations->SetOut(Location::RequiresRegister());
   }
 }
@@ -893,7 +893,7 @@
 }
 
 void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
-  Leb128Encoder<ArenaAllocatorAdapter<uint8_t>> vmap_encoder(data);
+  Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data);
   // We currently don't use callee-saved registers.
   size_t size = 0 + 1 /* marker */ + 0;
   vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 0a36989..acce5b3 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -531,6 +531,8 @@
 
   template <typename LabelType>
   LabelType* CommonInitializeLabels() {
+    // We use raw array allocations instead of ArenaVector<> because Labels are
+    // non-constructible and non-movable and as such cannot be held in a vector.
     size_t size = GetGraph()->GetBlocks().size();
     LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size,
                                                                       kArenaAllocCodeGenerator);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 54af41d..d172fba 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -4253,7 +4253,6 @@
 }
 
 void ParallelMoveResolverARM::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4386,7 +4385,6 @@
 }
 
 void ParallelMoveResolverARM::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4477,15 +4475,18 @@
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
   if (cls->NeedsAccessCheck()) {
     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
                             cls,
                             cls->GetDexPc(),
                             nullptr);
-  } else if (cls->IsReferrersClass()) {
+    return;
+  }
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register current_method = locations->InAt(0).AsRegister<Register>();
+  if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
     __ LoadFromOffset(
@@ -5200,7 +5201,7 @@
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   for (int32_t i = 0; i < num_entries; i++) {
     GenerateCompareWithImmediate(value_reg, lower_bound + i);
-    __ b(codegen_->GetLabelOf(successors.at(i)), EQ);
+    __ b(codegen_->GetLabelOf(successors[i]), EQ);
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 07758e9..c94da86 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -658,7 +658,6 @@
 }
 
 void ParallelMoveResolverARM64::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid);
 }
@@ -3026,15 +3025,18 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
-  Register out = OutputRegister(cls);
-  Register current_method = InputRegisterAt(cls, 0);
   if (cls->NeedsAccessCheck()) {
     codegen_->MoveConstant(cls->GetLocations()->GetTemp(0), cls->GetTypeIndex());
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
                             cls,
                             cls->GetDexPc(),
                             nullptr);
-  } else if (cls->IsReferrersClass()) {
+    return;
+  }
+
+  Register out = OutputRegister(cls);
+  Register current_method = InputRegisterAt(cls, 0);
+  if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
     __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
@@ -3776,7 +3778,7 @@
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   for (int32_t i = 0; i < num_entries; i++) {
     int32_t case_value = lower_bound + i;
-    vixl::Label* succ = codegen_->GetLabelOf(successors.at(i));
+    vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
     if (case_value == 0) {
       __ Cbz(value_reg, succ);
     } else {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 00bb505..1a08503 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -450,13 +450,11 @@
 }
 
 void ParallelMoveResolverMIPS64::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
 }
 
 void ParallelMoveResolverMIPS64::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
 }
@@ -2599,15 +2597,18 @@
 
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
   if (cls->NeedsAccessCheck()) {
     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
                             cls,
                             cls->GetDexPc(),
                             nullptr);
-  } else if (cls->IsReferrersClass()) {
+    return;
+  }
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>();
+  if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
     __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
@@ -3491,7 +3492,7 @@
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   for (int32_t i = 0; i < num_entries; i++) {
     int32_t case_value = lower_bound + i;
-    Label* succ = codegen_->GetLabelOf(successors.at(i));
+    Label* succ = codegen_->GetLabelOf(successors[i]);
     if (case_value == 0) {
       __ Beqzc(value_reg, succ);
     } else {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b89ca11..f8be21a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4768,7 +4768,6 @@
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4921,7 +4920,6 @@
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4998,15 +4996,18 @@
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
-  Register current_method = locations->InAt(0).AsRegister<Register>();
   if (cls->NeedsAccessCheck()) {
     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
                             cls,
                             cls->GetDexPc(),
                             nullptr);
-  } else if (cls->IsReferrersClass()) {
+    return;
+  }
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register current_method = locations->InAt(0).AsRegister<Register>();
+  if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
     __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
@@ -5659,7 +5660,7 @@
     } else {
       __ cmpl(value_reg, Immediate(case_value));
     }
-    __ j(kEqual, codegen_->GetLabelOf(successors.at(i)));
+    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ad6588c..21120a0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -36,9 +36,6 @@
 
 namespace x86_64 {
 
-// Some x86_64 instructions require a register to be available as temp.
-static constexpr Register TMP = R11;
-
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
 
@@ -4479,7 +4476,6 @@
 }
 
 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4638,7 +4634,6 @@
 }
 
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
@@ -4703,15 +4698,18 @@
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
   LocationSummary* locations = cls->GetLocations();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
   if (cls->NeedsAccessCheck()) {
     codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
     codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInitializeTypeAndVerifyAccess),
                             cls,
                             cls->GetDexPc(),
                             nullptr);
-  } else if (cls->IsReferrersClass()) {
+    return;
+  }
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+  if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
     __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
@@ -5342,7 +5340,7 @@
     } else {
       __ cmpl(value_reg, Immediate(case_value));
     }
-    __ j(kEqual, codegen_->GetLabelOf(successors.at(i)));
+    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ecc8630..d6a6a7e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -30,6 +30,9 @@
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
 
+// Some x86_64 instructions require a register to be available as temp.
+static constexpr Register TMP = R11;
+
 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
 static constexpr FloatRegister kParameterFloatRegisters[] =
     { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 5fc305c..22f227c 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -561,7 +561,7 @@
   ASSERT_FALSE(equal->NeedsMaterialization());
 
   auto hook_before_codegen = [](HGraph* graph_in) {
-    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
+    HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
     HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
     block->InsertInstructionBefore(move, block->GetLastInstruction());
   };
@@ -667,7 +667,7 @@
     code_block->AddInstruction(&ret);
 
     auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
+      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
       HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
@@ -733,7 +733,7 @@
     if_false_block->AddInstruction(&ret_ge);
 
     auto hook_before_codegen = [](HGraph* graph_in) {
-      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessor(0);
+      HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0];
       HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena());
       block->InsertInstructionBefore(move, block->GetLastInstruction());
     };
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 10e4bc9..b2e222f 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -51,7 +51,7 @@
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HConstantFolding(graph).Run();
-  SSAChecker ssa_checker_cf(&allocator, graph);
+  SSAChecker ssa_checker_cf(graph);
   ssa_checker_cf.Run();
   ASSERT_TRUE(ssa_checker_cf.IsValid());
 
@@ -63,7 +63,7 @@
   check_after_cf(graph);
 
   HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker_dce(&allocator, graph);
+  SSAChecker ssa_checker_dce(graph);
   ssa_checker_dce.Run();
   ASSERT_TRUE(ssa_checker_dce.IsValid());
 
@@ -113,7 +113,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), -1);
   };
@@ -175,7 +175,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsLongConstant());
     ASSERT_EQ(inst->AsLongConstant()->GetValue(), INT64_C(-4294967296));
   };
@@ -237,7 +237,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 3);
   };
@@ -317,7 +317,7 @@
 
   // Check the values of the computed constants.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst1 = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst1->IsIntConstant());
     ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 12);
     HInstruction* inst2 = inst1->GetPrevious();
@@ -389,7 +389,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
   };
@@ -453,7 +453,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsLongConstant());
     ASSERT_EQ(inst->AsLongConstant()->GetValue(), 3);
   };
@@ -518,7 +518,7 @@
 
   // Check the value of the computed constant.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsLongConstant());
     ASSERT_EQ(inst->AsLongConstant()->GetValue(), 1);
   };
@@ -620,7 +620,7 @@
 
   // Check the values of the computed constants.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst1 = graph->GetBlock(4)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst1 = graph->GetBlocks()[4]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst1->IsIntConstant());
     ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 20);
     HInstruction* inst2 = inst1->GetPrevious();
@@ -710,7 +710,7 @@
 
   // Check the values of the computed constants.
   auto check_after_cf = [](HGraph* graph) {
-    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction()->InputAt(0);
+    HInstruction* inst = graph->GetBlocks()[1]->GetFirstInstruction()->InputAt(0);
     ASSERT_TRUE(inst->IsIntConstant());
     ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
   };
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 007d0e3..9754043 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -63,7 +63,7 @@
             static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value);
         if (switch_index < switch_instruction->GetNumEntries()) {
           live_successors = live_successors.SubArray(switch_index, 1u);
-          DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index));
+          DCHECK_EQ(live_successors[0], block->GetSuccessors()[switch_index]);
         } else {
           live_successors = live_successors.SubArray(switch_instruction->GetNumEntries(), 1u);
           DCHECK_EQ(live_successors[0], switch_instruction->GetDefaultBlock());
@@ -136,7 +136,7 @@
       it.Advance();
       continue;
     }
-    HBasicBlock* successor = block->GetSuccessor(0);
+    HBasicBlock* successor = block->GetSuccessors()[0];
     if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
       it.Advance();
       continue;
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index ee3a61a..cf0a4ac 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -45,7 +45,7 @@
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
   HDeadCodeElimination(graph).Run();
-  SSAChecker ssa_checker(&allocator, graph);
+  SSAChecker ssa_checker(graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
 
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 6b18650..91e4a99 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -36,16 +36,16 @@
   ASSERT_EQ(graph->GetBlocks().size(), blocks_length);
   for (size_t i = 0, e = blocks_length; i < e; ++i) {
     if (blocks[i] == kInvalidBlockId) {
-      if (graph->GetBlock(i) == nullptr) {
+      if (graph->GetBlocks()[i] == nullptr) {
         // Dead block.
       } else {
         // Only the entry block has no dominator.
-        ASSERT_EQ(nullptr, graph->GetBlock(i)->GetDominator());
-        ASSERT_TRUE(graph->GetBlock(i)->IsEntryBlock());
+        ASSERT_EQ(nullptr, graph->GetBlocks()[i]->GetDominator());
+        ASSERT_TRUE(graph->GetBlocks()[i]->IsEntryBlock());
       }
     } else {
-      ASSERT_NE(nullptr, graph->GetBlock(i)->GetDominator());
-      ASSERT_EQ(blocks[i], graph->GetBlock(i)->GetDominator()->GetBlockId());
+      ASSERT_NE(nullptr, graph->GetBlocks()[i]->GetDominator());
+      ASSERT_EQ(blocks[i], graph->GetBlocks()[i]->GetDominator()->GetBlockId());
     }
   }
 }
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 9e0d352..9b0eb70 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -118,7 +118,7 @@
                       uint32_t parent_loop_header_id,
                       const int* blocks_in_loop = nullptr,
                       size_t number_of_blocks = 0) {
-  HBasicBlock* block = graph->GetBlock(block_id);
+  HBasicBlock* block = graph->GetBlocks()[block_id];
   ASSERT_EQ(block->IsLoopHeader(), is_loop_header);
   if (parent_loop_header_id == kInvalidBlockId) {
     ASSERT_EQ(block->GetLoopInformation(), nullptr);
@@ -296,10 +296,10 @@
   TestBlock(graph, 7, false, kInvalidBlockId);  // exit block
   TestBlock(graph, 8, false, 2);                // synthesized block as pre header of inner loop
 
-  ASSERT_TRUE(graph->GetBlock(3)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(2)->GetLoopInformation()));
-  ASSERT_FALSE(graph->GetBlock(2)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(3)->GetLoopInformation()));
+  ASSERT_TRUE(graph->GetBlocks()[3]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[2]->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks()[2]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[3]->GetLoopInformation()));
 }
 
 TEST(FindLoopsTest, TwoLoops) {
@@ -326,10 +326,10 @@
   TestBlock(graph, 6, false, kInvalidBlockId);  // return block
   TestBlock(graph, 7, false, kInvalidBlockId);  // exit block
 
-  ASSERT_FALSE(graph->GetBlock(4)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(2)->GetLoopInformation()));
-  ASSERT_FALSE(graph->GetBlock(2)->GetLoopInformation()->IsIn(
-                    *graph->GetBlock(4)->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks()[4]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[2]->GetLoopInformation()));
+  ASSERT_FALSE(graph->GetBlocks()[2]->GetLoopInformation()->IsIn(
+                    *graph->GetBlocks()[4]->GetLoopInformation()));
 }
 
 TEST(FindLoopsTest, NonNaturalLoop) {
@@ -344,8 +344,8 @@
   ArenaPool arena;
   ArenaAllocator allocator(&arena);
   HGraph* graph = TestCode(data, &allocator);
-  ASSERT_TRUE(graph->GetBlock(3)->IsLoopHeader());
-  HLoopInformation* info = graph->GetBlock(3)->GetLoopInformation();
+  ASSERT_TRUE(graph->GetBlocks()[3]->IsLoopHeader());
+  HLoopInformation* info = graph->GetBlocks()[3]->GetLoopInformation();
   ASSERT_EQ(1u, info->NumberOfBackEdges());
   ASSERT_FALSE(info->GetHeader()->Dominates(info->GetBackEdges()[0]));
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4e1cafe..3de96b5 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -16,10 +16,12 @@
 
 #include "graph_checker.h"
 
+#include <algorithm>
 #include <map>
 #include <string>
 #include <sstream>
 
+#include "base/arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "base/stringprintf.h"
 
@@ -29,19 +31,21 @@
   current_block_ = block;
 
   // Check consistency with respect to predecessors of `block`.
-  std::map<HBasicBlock*, size_t> predecessors_count;
+  ArenaSafeMap<HBasicBlock*, size_t> predecessors_count(
+      std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
   for (HBasicBlock* p : block->GetPredecessors()) {
-    ++predecessors_count[p];
+    auto it = predecessors_count.find(p);
+    if (it != predecessors_count.end()) {
+      ++it->second;
+    } else {
+      predecessors_count.Put(p, 1u);
+    }
   }
   for (auto& pc : predecessors_count) {
     HBasicBlock* p = pc.first;
     size_t p_count_in_block_predecessors = pc.second;
-    size_t block_count_in_p_successors = 0;
-    for (HBasicBlock* p_successor : p->GetSuccessors()) {
-      if (p_successor == block) {
-        ++block_count_in_p_successors;
-      }
-    }
+    size_t block_count_in_p_successors =
+        std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
     if (p_count_in_block_predecessors != block_count_in_p_successors) {
       AddError(StringPrintf(
           "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
@@ -52,19 +56,21 @@
   }
 
   // Check consistency with respect to successors of `block`.
-  std::map<HBasicBlock*, size_t> successors_count;
+  ArenaSafeMap<HBasicBlock*, size_t> successors_count(
+      std::less<HBasicBlock*>(), GetGraph()->GetArena()->Adapter(kArenaAllocGraphChecker));
   for (HBasicBlock* s : block->GetSuccessors()) {
-    ++successors_count[s];
+    auto it = successors_count.find(s);
+    if (it != successors_count.end()) {
+      ++it->second;
+    } else {
+      successors_count.Put(s, 1u);
+    }
   }
   for (auto& sc : successors_count) {
     HBasicBlock* s = sc.first;
     size_t s_count_in_block_successors = sc.second;
-    size_t block_count_in_s_predecessors = 0;
-    for (HBasicBlock* s_predecessor : s->GetPredecessors()) {
-      if (s_predecessor == block) {
-        ++block_count_in_s_predecessors;
-      }
-    }
+    size_t block_count_in_s_predecessors =
+        std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
     if (s_count_in_block_successors != block_count_in_s_predecessors) {
       AddError(StringPrintf(
           "Block %d lists %zu occurrences of block %d in its successors, whereas "
@@ -351,7 +357,7 @@
   // never exceptional successors.
   const size_t num_normal_successors = block->NumberOfNormalSuccessors();
   for (size_t j = 0; j < num_normal_successors; ++j) {
-    HBasicBlock* successor = block->GetSuccessor(j);
+    HBasicBlock* successor = block->GetSuccessors()[j];
     if (successor->IsCatchBlock()) {
       AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
                             successor->GetBlockId(),
@@ -359,7 +365,7 @@
     }
   }
   for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) {
-    HBasicBlock* successor = block->GetSuccessor(j);
+    HBasicBlock* successor = block->GetSuccessors()[j];
     if (!successor->IsCatchBlock()) {
       AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
                             successor->GetBlockId(),
@@ -373,7 +379,7 @@
   // not accounted for.
   if (block->NumberOfNormalSuccessors() > 1) {
     for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
-      HBasicBlock* successor = block->GetSuccessor(j);
+      HBasicBlock* successor = block->GetSuccessors()[j];
       if (successor->GetPredecessors().size() > 1) {
         AddError(StringPrintf("Critical edge between blocks %d and %d.",
                               block->GetBlockId(),
@@ -456,14 +462,14 @@
         id,
         num_preds));
   } else {
-    HBasicBlock* first_predecessor = loop_header->GetPredecessor(0);
+    HBasicBlock* first_predecessor = loop_header->GetPredecessors()[0];
     if (loop_information->IsBackEdge(*first_predecessor)) {
       AddError(StringPrintf(
           "First predecessor of loop header %d is a back edge.",
           id));
     }
     for (size_t i = 1, e = loop_header->GetPredecessors().size(); i < e; ++i) {
-      HBasicBlock* predecessor = loop_header->GetPredecessor(i);
+      HBasicBlock* predecessor = loop_header->GetPredecessors()[i];
       if (!loop_information->IsBackEdge(*predecessor)) {
         AddError(StringPrintf(
             "Loop header %d has multiple incoming (non back edge) blocks.",
@@ -493,7 +499,7 @@
 
   // Ensure all blocks in the loop are live and dominated by the loop header.
   for (uint32_t i : loop_blocks.Indexes()) {
-    HBasicBlock* loop_block = GetGraph()->GetBlock(i);
+    HBasicBlock* loop_block = GetGraph()->GetBlocks()[i];
     if (loop_block == nullptr) {
       AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.",
                             id,
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 7ddffc1..abf3659 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -26,12 +26,11 @@
 // A control-flow graph visitor performing various checks.
 class GraphChecker : public HGraphDelegateVisitor {
  public:
-  GraphChecker(ArenaAllocator* allocator, HGraph* graph,
-               const char* dump_prefix = "art::GraphChecker: ")
+  explicit GraphChecker(HGraph* graph, const char* dump_prefix = "art::GraphChecker: ")
     : HGraphDelegateVisitor(graph),
-      allocator_(allocator),
+      errors_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)),
       dump_prefix_(dump_prefix),
-      seen_ids_(allocator, graph->GetCurrentInstructionId(), false) {}
+      seen_ids_(graph->GetArena(), graph->GetCurrentInstructionId(), false) {}
 
   // Check the whole graph (in insertion order).
   virtual void Run() { VisitInsertionOrder(); }
@@ -65,7 +64,7 @@
   }
 
   // Get the list of detected errors.
-  const std::vector<std::string>& GetErrors() const {
+  const ArenaVector<std::string>& GetErrors() const {
     return errors_;
   }
 
@@ -82,11 +81,10 @@
     errors_.push_back(error);
   }
 
-  ArenaAllocator* const allocator_;
   // The block currently visited.
   HBasicBlock* current_block_ = nullptr;
   // Errors encountered while checking the graph.
-  std::vector<std::string> errors_;
+  ArenaVector<std::string> errors_;
 
  private:
   // String displayed before dumped errors.
@@ -102,9 +100,8 @@
  public:
   typedef GraphChecker super_type;
 
-  // TODO: There's no need to pass a separate allocator as we could get it from the graph.
-  SSAChecker(ArenaAllocator* allocator, HGraph* graph)
-    : GraphChecker(allocator, graph, "art::SSAChecker: ") {}
+  explicit SSAChecker(HGraph* graph)
+    : GraphChecker(graph, "art::SSAChecker: ") {}
 
   // Check the whole graph (in reverse post-order).
   void Run() OVERRIDE {
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 0f66775..fee56c7 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -50,7 +50,7 @@
   HGraph* graph = CreateCFG(&allocator, data);
   ASSERT_NE(graph, nullptr);
 
-  GraphChecker graph_checker(&allocator, graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
 }
@@ -64,7 +64,7 @@
   graph->BuildDominatorTree();
   graph->TransformToSsa();
 
-  SSAChecker ssa_checker(&allocator, graph);
+  SSAChecker ssa_checker(graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
 }
@@ -112,7 +112,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = CreateSimpleCFG(&allocator);
-  GraphChecker graph_checker(&allocator, graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
 
@@ -130,7 +130,7 @@
   ArenaAllocator allocator(&pool);
 
   HGraph* graph = CreateSimpleCFG(&allocator);
-  GraphChecker graph_checker(&allocator, graph);
+  GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
 
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 7968e88..d4b9b71 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -99,7 +99,7 @@
   ASSERT_NE(false_block, return_block);
 
   // Ensure the new block branches to the join block.
-  ASSERT_EQ(false_block->GetSuccessor(0), return_block);
+  ASSERT_EQ(false_block->GetSuccessors()[0], return_block);
 }
 
 // Test that the successors of an if block stay consistent after a SimplifyCFG.
@@ -134,7 +134,7 @@
   ASSERT_NE(true_block, return_block);
 
   // Ensure the new block branches to the join block.
-  ASSERT_EQ(true_block->GetSuccessor(0), return_block);
+  ASSERT_EQ(true_block->GetSuccessors()[0], return_block);
 }
 
 // Test that the successors of an if block stay consistent after a SimplifyCFG.
@@ -164,11 +164,11 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessor(0), entry_block);
-  ASSERT_NE(if_block->GetPredecessor(1), if_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
-  ASSERT_EQ(if_block->GetPredecessor(1),
+  ASSERT_EQ(if_block->GetPredecessors()[1],
             if_block->GetLastInstruction()->AsIf()->IfTrueSuccessor());
 }
 
@@ -199,11 +199,11 @@
 
   // Ensure there is only one back edge.
   ASSERT_EQ(if_block->GetPredecessors().size(), 2u);
-  ASSERT_EQ(if_block->GetPredecessor(0), entry_block);
-  ASSERT_NE(if_block->GetPredecessor(1), if_block);
+  ASSERT_EQ(if_block->GetPredecessors()[0], entry_block);
+  ASSERT_NE(if_block->GetPredecessors()[1], if_block);
 
   // Ensure the new block is the back edge.
-  ASSERT_EQ(if_block->GetPredecessor(1),
+  ASSERT_EQ(if_block->GetPredecessors()[1],
             if_block->GetLastInstruction()->AsIf()->IfFalseSuccessor());
 }
 
@@ -242,7 +242,7 @@
 
   // Ensure the new block is the successor of the true block.
   ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors().size(), 1u);
-  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessor(0),
+  ASSERT_EQ(if_instr->IfTrueSuccessor()->GetSuccessors()[0],
             loop_block->GetLoopInformation()->GetPreHeader());
 }
 
@@ -280,7 +280,7 @@
 
   // Ensure the new block is the successor of the false block.
   ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors().size(), 1u);
-  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessor(0),
+  ASSERT_EQ(if_instr->IfFalseSuccessor()->GetSuccessors()[0],
             loop_block->GetLoopInformation()->GetPreHeader());
 }
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index d38f4c8..4111671 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -253,7 +253,7 @@
     AddIndent();
     output_ << "successors";
     for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
-      HBasicBlock* successor = block->GetSuccessor(i);
+      HBasicBlock* successor = block->GetSuccessors()[i];
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
     output_<< std::endl;
@@ -263,7 +263,7 @@
     AddIndent();
     output_ << "xhandlers";
     for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) {
-      HBasicBlock* handler = block->GetSuccessor(i);
+      HBasicBlock* handler = block->GetSuccessors()[i];
       output_ << " \"B" << handler->GetBlockId() << "\" ";
     }
     if (block->IsExitBlock() &&
@@ -362,6 +362,8 @@
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     StartAttributeStream("gen_clinit_check") << std::boolalpha
         << load_class->MustGenerateClinitCheck() << std::noboolalpha;
+    StartAttributeStream("needs_access_check") << std::boolalpha
+        << load_class->NeedsAccessCheck() << std::noboolalpha;
   }
 
   void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 7cf0617..0a1758a 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -351,7 +351,7 @@
     HBasicBlock* dominator = block->GetDominator();
     ValueSet* dominator_set = sets_[dominator->GetBlockId()];
     if (dominator->GetSuccessors().size() == 1) {
-      DCHECK_EQ(dominator->GetSuccessor(0), block);
+      DCHECK_EQ(dominator->GetSuccessors()[0], block);
       set = dominator_set;
     } else {
       // We have to copy if the dominator has other successors, or `block` is not a successor
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 32f45b5..aa375f6 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -34,7 +34,10 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (&allocator) HBasicBlock(graph);
@@ -47,14 +50,16 @@
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter,
                                                            Primitive::kPrimNot,
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   HInstruction* to_remove = block->GetLastInstruction();
   block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter,
                                                            Primitive::kPrimNot,
@@ -62,7 +67,8 @@
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   HInstruction* different_offset = block->GetLastInstruction();
   // Kill the value.
   block->AddInstruction(new (&allocator) HInstanceFieldSet(parameter,
@@ -72,14 +78,16 @@
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter,
                                                            Primitive::kPrimNot,
                                                            MemberOffset(42),
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   HInstruction* use_after_kill = block->GetLastInstruction();
   block->AddInstruction(new (&allocator) HExit());
 
@@ -106,7 +114,10 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (&allocator) HBasicBlock(graph);
@@ -118,7 +129,8 @@
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
 
   block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction()));
   HBasicBlock* then = new (&allocator) HBasicBlock(graph);
@@ -139,7 +151,8 @@
                                                           false,
                                                           kUnknownFieldIndex,
                                                           graph->GetDexFile(),
-                                                          dex_cache));
+                                                          dex_cache,
+                                                          0));
   then->AddInstruction(new (&allocator) HGoto());
   else_->AddInstruction(new (&allocator) HInstanceFieldGet(parameter,
                                                            Primitive::kPrimBoolean,
@@ -147,7 +160,8 @@
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   else_->AddInstruction(new (&allocator) HGoto());
   join->AddInstruction(new (&allocator) HInstanceFieldGet(parameter,
                                                           Primitive::kPrimBoolean,
@@ -155,7 +169,8 @@
                                                           false,
                                                           kUnknownFieldIndex,
                                                           graph->GetDexFile(),
-                                                          dex_cache));
+                                                          dex_cache,
+                                                          0));
   join->AddInstruction(new (&allocator) HExit());
 
   graph->TryBuildingSsa();
@@ -179,7 +194,10 @@
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
 
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (&allocator) HBasicBlock(graph);
@@ -191,7 +209,8 @@
                                                            false,
                                                            kUnknownFieldIndex,
                                                            graph->GetDexFile(),
-                                                           dex_cache));
+                                                           dex_cache,
+                                                           0));
   block->AddInstruction(new (&allocator) HGoto());
 
   HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph);
@@ -212,7 +231,8 @@
                                                                  false,
                                                                  kUnknownFieldIndex,
                                                                  graph->GetDexFile(),
-                                                                 dex_cache));
+                                                                 dex_cache,
+                                                                 0));
   HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction();
   loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction()));
 
@@ -225,7 +245,8 @@
                                                                false,
                                                                kUnknownFieldIndex,
                                                                graph->GetDexFile(),
-                                                               dex_cache));
+                                                               dex_cache,
+                                                               0));
   HInstruction* field_set = loop_body->GetLastInstruction();
   loop_body->AddInstruction(new (&allocator) HInstanceFieldGet(parameter,
                                                                Primitive::kPrimBoolean,
@@ -233,7 +254,8 @@
                                                                false,
                                                                kUnknownFieldIndex,
                                                                graph->GetDexFile(),
-                                                               dex_cache));
+                                                               dex_cache,
+                                                               0));
   HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction();
   loop_body->AddInstruction(new (&allocator) HGoto());
 
@@ -243,7 +265,8 @@
                                                           false,
                                                           kUnknownFieldIndex,
                                                           graph->GetDexFile(),
-                                                          dex_cache));
+                                                          dex_cache,
+                                                          0));
   HInstruction* field_get_in_exit = exit->GetLastInstruction();
   exit->AddInstruction(new (&allocator) HExit());
 
@@ -314,7 +337,10 @@
   inner_loop_body->AddSuccessor(inner_loop_header);
   inner_loop_exit->AddSuccessor(outer_loop_header);
 
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimBoolean);
+  HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(),
+                                                             0,
+                                                             0,
+                                                             Primitive::kPrimBoolean);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
   outer_loop_header->AddInstruction(new (&allocator) HIf(parameter));
@@ -339,7 +365,8 @@
                                                              false,
                                                              kUnknownFieldIndex,
                                                              graph->GetDexFile(),
-                                                             dex_cache));
+                                                             dex_cache,
+                                                             0));
 
     SideEffectsAnalysis side_effects(graph);
     side_effects.Run();
@@ -362,7 +389,8 @@
                                            false,
                                            kUnknownFieldIndex,
                                            graph->GetDexFile(),
-                                           dex_cache),
+                                           dex_cache,
+                                           0),
         outer_loop_body->GetLastInstruction());
 
     SideEffectsAnalysis side_effects(graph);
@@ -386,7 +414,8 @@
                                            false,
                                            kUnknownFieldIndex,
                                            graph->GetDexFile(),
-                                           dex_cache),
+                                           dex_cache,
+                                           0),
         inner_loop_body->GetLastInstruction());
 
     SideEffectsAnalysis side_effects(graph);
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index e5123de..cf0f349 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -47,7 +47,7 @@
   size_t phi_pos = -1;
   const size_t size = scc->size();
   for (size_t i = 0; i < size; i++) {
-    HInstruction* other = scc->at(i);
+    HInstruction* other = (*scc)[i];
     if (other->IsLoopHeaderPhi() && (phi == nullptr || phis.FoundBefore(other, phi))) {
       phi = other;
       phi_pos = i;
@@ -58,8 +58,7 @@
   if (phi != nullptr) {
     new_scc->clear();
     for (size_t i = 0; i < size; i++) {
-      DCHECK_LT(phi_pos, size);
-      new_scc->push_back(scc->at(phi_pos));
+      new_scc->push_back((*scc)[phi_pos]);
       if (++phi_pos >= size) phi_pos = 0;
     }
     DCHECK_EQ(size, new_scc->size());
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 20492e7..19af2fb 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -78,7 +78,8 @@
     graph_->SetExitBlock(exit_);
 
     // Provide entry and exit instructions.
-    parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot, true);
+    parameter_ = new (&allocator_) HParameterValue(
+        graph_->GetDexFile(), 0, 0, Primitive::kPrimNot, true);
     entry_->AddInstruction(parameter_);
     constant0_ = graph_->GetIntConstant(0);
     constant1_ = graph_->GetIntConstant(1);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0b65c56..f3b5f08 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -43,6 +43,11 @@
 static constexpr size_t kMaximumNumberOfHInstructions = 12;
 
 void HInliner::Run() {
+  const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
+  if ((compiler_options.GetInlineDepthLimit() == 0)
+      || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
+    return;
+  }
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
     // doing some logic in the runtime to discover if a method could have been inlined.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 86a3ad9..839cf44 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,15 +16,16 @@
 
 #include "instruction_simplifier.h"
 
+#include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
 
-class InstructionSimplifierVisitor : public HGraphVisitor {
+class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
  public:
   InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
-      : HGraphVisitor(graph),
+      : HGraphDelegateVisitor(graph),
         stats_(stats) {}
 
   void Run();
@@ -71,9 +72,13 @@
   void VisitXor(HXor* instruction) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
   void VisitFakeString(HFakeString* fake_string) OVERRIDE;
+  void VisitInvoke(HInvoke* invoke) OVERRIDE;
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifySystemArrayCopy(HInvoke* invoke);
+  void SimplifyStringEquals(HInvoke* invoke);
+
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -240,6 +245,12 @@
 
 void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
   HInstruction* object = check_cast->InputAt(0);
+  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+  if (load_class->NeedsAccessCheck()) {
+    // If we need to perform an access check we cannot remove the instruction.
+    return;
+  }
+
   if (CanEnsureNotNullAt(object, check_cast)) {
     check_cast->ClearMustDoNullCheck();
   }
@@ -253,7 +264,6 @@
   }
 
   bool outcome;
-  HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome) {
       check_cast->GetBlock()->RemoveInstruction(check_cast);
@@ -275,6 +285,12 @@
 
 void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
   HInstruction* object = instruction->InputAt(0);
+  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
+  if (load_class->NeedsAccessCheck()) {
+    // If we need to perform an access check we cannot remove the instruction.
+    return;
+  }
+
   bool can_be_null = true;
   if (CanEnsureNotNullAt(object, instruction)) {
     can_be_null = false;
@@ -290,7 +306,6 @@
   }
 
   bool outcome;
-  HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
   if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
     if (outcome && can_be_null) {
       // Type test will succeed, we just need a null test.
@@ -1037,4 +1052,101 @@
   instruction->GetBlock()->RemoveInstruction(instruction);
 }
 
+void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
+  HInstruction* argument = instruction->InputAt(1);
+  HInstruction* receiver = instruction->InputAt(0);
+  if (receiver == argument) {
+    // Because String.equals is an instance call, the receiver is
+    // a null check if we don't know it's null. The argument however, will
+    // be the actual object. So we cannot end up in a situation where both
+    // are equal but could be null.
+    DCHECK(CanEnsureNotNullAt(argument, instruction));
+    instruction->ReplaceWith(GetGraph()->GetIntConstant(1));
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  } else {
+    StringEqualsOptimizations optimizations(instruction);
+    if (CanEnsureNotNullAt(argument, instruction)) {
+      optimizations.SetArgumentNotNull();
+    }
+    ScopedObjectAccess soa(Thread::Current());
+    ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo();
+    if (argument_rti.IsValid() && argument_rti.IsStringClass()) {
+      optimizations.SetArgumentIsString();
+    }
+  }
+}
+
+static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
+  if (potential_length->IsArrayLength()) {
+    return potential_length->InputAt(0) == potential_array;
+  }
+
+  if (potential_array->IsNewArray()) {
+    return potential_array->InputAt(0) == potential_length;
+  }
+
+  return false;
+}
+
+void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) {
+  HInstruction* source = instruction->InputAt(0);
+  HInstruction* destination = instruction->InputAt(2);
+  HInstruction* count = instruction->InputAt(4);
+  SystemArrayCopyOptimizations optimizations(instruction);
+  if (CanEnsureNotNullAt(source, instruction)) {
+    optimizations.SetSourceIsNotNull();
+  }
+  if (CanEnsureNotNullAt(destination, instruction)) {
+    optimizations.SetDestinationIsNotNull();
+  }
+  if (destination == source) {
+    optimizations.SetDestinationIsSource();
+  }
+
+  if (IsArrayLengthOf(count, source)) {
+    optimizations.SetCountIsSourceLength();
+  }
+
+  if (IsArrayLengthOf(count, destination)) {
+    optimizations.SetCountIsDestinationLength();
+  }
+
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
+    if (destination_rti.IsValid()) {
+      if (destination_rti.IsObjectArray()) {
+        if (destination_rti.IsExact()) {
+          optimizations.SetDoesNotNeedTypeCheck();
+        }
+        optimizations.SetDestinationIsTypedObjectArray();
+      }
+      if (destination_rti.IsPrimitiveArrayClass()) {
+        optimizations.SetDestinationIsPrimitiveArray();
+      } else if (destination_rti.IsNonPrimitiveArrayClass()) {
+        optimizations.SetDestinationIsNonPrimitiveArray();
+      }
+    }
+    ReferenceTypeInfo source_rti = source->GetReferenceTypeInfo();
+    if (source_rti.IsValid()) {
+      if (destination_rti.IsValid() && destination_rti.CanArrayHoldValuesOf(source_rti)) {
+        optimizations.SetDoesNotNeedTypeCheck();
+      }
+      if (source_rti.IsPrimitiveArrayClass()) {
+        optimizations.SetSourceIsPrimitiveArray();
+      } else if (source_rti.IsNonPrimitiveArrayClass()) {
+        optimizations.SetSourceIsNonPrimitiveArray();
+      }
+    }
+  }
+}
+
+void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
+  if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) {
+    SimplifyStringEquals(instruction);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
+    SimplifySystemArrayCopy(instruction);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 9564622..dbe7524 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -210,6 +210,9 @@
     case kIntrinsicSystemArrayCopyCharArray:
       return Intrinsics::kSystemArrayCopyChar;
 
+    case kIntrinsicSystemArrayCopy:
+      return Intrinsics::kSystemArrayCopy;
+
     // Thread.currentThread.
     case kIntrinsicCurrentThread:
       return  Intrinsics::kThreadCurrentThread;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index d1a17b6..e459516 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -116,6 +116,80 @@
   DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
 };
 
+#define GENERIC_OPTIMIZATION(name, bit)                \
+public:                                                \
+void Set##name() { SetBit(k##name); }                  \
+bool Get##name() const { return IsBitSet(k##name); }   \
+private:                                               \
+static constexpr int k##name = bit
+
+class IntrinsicOptimizations : public ValueObject {
+ public:
+  explicit IntrinsicOptimizations(HInvoke* invoke) : value_(invoke->GetIntrinsicOptimizations()) {}
+  explicit IntrinsicOptimizations(const HInvoke& invoke)
+      : value_(invoke.GetIntrinsicOptimizations()) {}
+
+  static constexpr int kNumberOfGenericOptimizations = 2;
+  GENERIC_OPTIMIZATION(DoesNotNeedDexCache, 0);
+  GENERIC_OPTIMIZATION(DoesNotNeedEnvironment, 1);
+
+ protected:
+  bool IsBitSet(uint32_t bit) const {
+    return (*value_ & (1 << bit)) != 0u;
+  }
+
+  void SetBit(uint32_t bit) {
+    *(const_cast<uint32_t*>(value_)) |= (1 << bit);
+  }
+
+ private:
+  const uint32_t *value_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicOptimizations);
+};
+
+#undef GENERIC_OPTIMIZATION
+
+#define INTRINSIC_OPTIMIZATION(name, bit)                             \
+public:                                                               \
+void Set##name() { SetBit(k##name); }                                 \
+bool Get##name() const { return IsBitSet(k##name); }                  \
+private:                                                              \
+static constexpr int k##name = bit + kNumberOfGenericOptimizations
+
+class StringEqualsOptimizations : public IntrinsicOptimizations {
+ public:
+  explicit StringEqualsOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
+
+  INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0);
+  INTRINSIC_OPTIMIZATION(ArgumentIsString, 1);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations);
+};
+
+class SystemArrayCopyOptimizations : public IntrinsicOptimizations {
+ public:
+  explicit SystemArrayCopyOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
+
+  INTRINSIC_OPTIMIZATION(SourceIsNotNull, 0);
+  INTRINSIC_OPTIMIZATION(DestinationIsNotNull, 1);
+  INTRINSIC_OPTIMIZATION(DestinationIsSource, 2);
+  INTRINSIC_OPTIMIZATION(CountIsSourceLength, 3);
+  INTRINSIC_OPTIMIZATION(CountIsDestinationLength, 4);
+  INTRINSIC_OPTIMIZATION(DoesNotNeedTypeCheck, 5);
+  INTRINSIC_OPTIMIZATION(DestinationIsTypedObjectArray, 6);
+  INTRINSIC_OPTIMIZATION(DestinationIsNonPrimitiveArray, 7);
+  INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8);
+  INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9);
+  INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations);
+};
+
+#undef INTRISIC_OPTIMIZATION
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 938c78e..2793793 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1333,6 +1333,7 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index b0cfd0d..4da94ee 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1447,6 +1447,7 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index bfe5e55..8f1d5e1 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -58,6 +58,7 @@
   V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \
   V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \
   V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \
+  V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \
   V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \
   V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \
   V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b60905d..764a114 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -812,6 +812,7 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 318d3a6..e83aebb 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1054,17 +1054,22 @@
   // Note that the null check must have been done earlier.
   DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
 
-  // Check if input is null, return false if it is.
-  __ testl(arg, arg);
-  __ j(kEqual, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ testl(arg, arg);
+    __ j(kEqual, &return_false);
+  }
 
   // Instanceof check for the argument by comparing class fields.
   // All string objects must have the same type since String cannot be subclassed.
   // Receiver must be a string object, so its class field is equal to all strings' class fields.
   // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ movl(ecx, Address(str, class_offset));
-  __ cmpl(ecx, Address(arg, class_offset));
-  __ j(kNotEqual, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    __ movl(ecx, Address(str, class_offset));
+    __ cmpl(ecx, Address(arg, class_offset));
+    __ j(kNotEqual, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ cmpl(str, arg);
@@ -2250,6 +2255,7 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1a13b69..1061aae 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -620,7 +620,6 @@
   codegen_->Load64BitValue(out, kPrimIntMax);
 
   // if inPlusPointFive >= maxInt goto done
-  __ movl(out, Immediate(kPrimIntMax));
   __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
   __ j(kAboveEqual, &done);
 
@@ -668,7 +667,6 @@
   codegen_->Load64BitValue(out, kPrimLongMax);
 
   // if inPlusPointFive >= maxLong goto done
-  __ movq(out, Immediate(kPrimLongMax));
   __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
   __ j(kAboveEqual, &done);
 
@@ -754,7 +752,7 @@
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCallOnSlowPath,
                                                             kIntrinsified);
-  // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
   locations->SetInAt(2, Location::RequiresRegister());
@@ -770,19 +768,27 @@
 static void CheckPosition(X86_64Assembler* assembler,
                           Location pos,
                           CpuRegister input,
-                          CpuRegister length,
+                          Location length,
                           SlowPathCode* slow_path,
                           CpuRegister input_len,
-                          CpuRegister temp) {
-  // Where is the length in the String?
+                          CpuRegister temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
   if (pos.IsConstant()) {
     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
     if (pos_const == 0) {
-      // Check that length(input) >= length.
-      __ cmpl(Address(input, length_offset), length);
-      __ j(kLess, slow_path->GetEntryLabel());
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        if (length.IsConstant()) {
+          __ cmpl(Address(input, length_offset),
+                  Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmpl(Address(input, length_offset), length.AsRegister<CpuRegister>());
+        }
+        __ j(kLess, slow_path->GetEntryLabel());
+      }
     } else {
       // Check that length(input) >= pos.
       __ movl(input_len, Address(input, length_offset));
@@ -791,9 +797,18 @@
 
       // Check that (length(input) - pos) >= length.
       __ leal(temp, Address(input_len, -pos_const));
-      __ cmpl(temp, length);
+      if (length.IsConstant()) {
+        __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmpl(temp, length.AsRegister<CpuRegister>());
+      }
       __ j(kLess, slow_path->GetEntryLabel());
     }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
@@ -807,7 +822,11 @@
     // Check that (length(input) - pos) >= length.
     __ movl(temp, Address(input, length_offset));
     __ subl(temp, pos_reg);
-    __ cmpl(temp, length);
+    if (length.IsConstant()) {
+      __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(temp, length.AsRegister<CpuRegister>());
+    }
     __ j(kLess, slow_path->GetEntryLabel());
   }
 }
@@ -817,9 +836,9 @@
   LocationSummary* locations = invoke->GetLocations();
 
   CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
-  Location srcPos = locations->InAt(1);
+  Location src_pos = locations->InAt(1);
   CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
-  Location destPos = locations->InAt(3);
+  Location dest_pos = locations->InAt(3);
   Location length = locations->InAt(4);
 
   // Temporaries that we need for MOVSW.
@@ -852,6 +871,12 @@
     __ j(kLess, slow_path->GetEntryLabel());
   }
 
+  // Validity checks: source.
+  CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
+
+  // Validity checks: dest.
+  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
+
   // We need the count in RCX.
   if (length.IsConstant()) {
     __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
@@ -859,12 +884,6 @@
     __ movl(count, length.AsRegister<CpuRegister>());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
-
-  // Validity checks: dest.
-  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
-
   // Okay, everything checks out.  Finally time to do the copy.
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
@@ -872,18 +891,18 @@
 
   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
 
-  if (srcPos.IsConstant()) {
-    int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_base, Address(src, char_size * src_pos_const + data_offset));
   } else {
-    __ leal(src_base, Address(src, srcPos.AsRegister<CpuRegister>(),
+    __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(),
                               ScaleFactor::TIMES_2, data_offset));
   }
-  if (destPos.IsConstant()) {
-    int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
+  if (dest_pos.IsConstant()) {
+    int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(dest_base, Address(dest, char_size * dest_pos_const + data_offset));
   } else {
-    __ leal(dest_base, Address(dest, destPos.AsRegister<CpuRegister>(),
+    __ leal(dest_base, Address(dest, dest_pos.AsRegister<CpuRegister>(),
                                ScaleFactor::TIMES_2, data_offset));
   }
 
@@ -893,6 +912,277 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+
+void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  // Check to see if we have known failures that will cause us to have to bail out
+  // to the runtime, and just generate the runtime call directly.
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+
+  // The positions must be non-negative.
+  if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
+      (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
+    // We will have to fail anyways.
+    return;
+  }
+
+  // The length must be > 0.
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  if (length != nullptr) {
+    int32_t len = length->GetValue();
+    if (len < 0) {
+      // Just call as normal.
+      return;
+    }
+  }
+
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (optimizations.GetDestinationIsSource()) {
+    if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
+      // We only support backward copying if source and destination are the same.
+      return;
+    }
+  }
+
+  if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
+    // We currently don't intrinsify primitive copying.
+    return;
+  }
+
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
+  locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
+
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
+  X86_64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
+  Location src_pos = locations->InAt(1);
+  CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+  CpuRegister temp3 = locations->GetTemp(2).AsRegister<CpuRegister>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  NearLabel ok;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  if (!optimizations.GetDestinationIsSource()) {
+    __ cmpl(src, dest);
+  }
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ j(kNotEqual, &ok);
+      }
+      __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
+      __ j(kGreater, slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ j(kNotEqual, &ok);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmpl(src_pos.AsRegister<CpuRegister>(), Immediate(dest_pos_constant));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ cmpl(src_pos.AsRegister<CpuRegister>(), dest_pos.AsRegister<CpuRegister>());
+      __ j(kLess, slow_path->GetEntryLabel());
+    }
+  }
+
+  __ Bind(&ok);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ testl(src, src);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ testl(dest, dest);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                slow_path,
+                temp1,
+                temp2,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+    __ movl(temp1, Address(dest, class_offset));
+    __ movl(temp2, Address(src, class_offset));
+    bool did_unpoison = false;
+    if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+        !optimizations.GetSourceIsNonPrimitiveArray()) {
+      // One or two of the references need to be unpoisoned. Unpoisoned them
+      // both to make the identity check valid.
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ MaybeUnpoisonHeapReference(temp2);
+      did_unpoison = true;
+    }
+
+    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+      // Bail out if the destination is not a non primitive array.
+      __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      // Bail out if the source is not a non primitive array.
+      __ movl(CpuRegister(TMP), Address(temp2, component_offset));
+      __ testl(CpuRegister(TMP), CpuRegister(TMP));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+      __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    __ cmpl(temp1, temp2);
+
+    if (optimizations.GetDestinationIsTypedObjectArray()) {
+      NearLabel do_copy;
+      __ j(kEqual, &do_copy);
+      if (!did_unpoison) {
+        __ MaybeUnpoisonHeapReference(temp1);
+      }
+      __ movl(temp1, Address(temp1, component_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ movl(temp1, Address(temp1, super_offset));
+      // No need to unpoison the result, we're comparing against null.
+      __ testl(temp1, temp1);
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(&do_copy);
+    } else {
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    __ movl(temp1, Address(src, class_offset));
+    __ MaybeUnpoisonHeapReference(temp1);
+    __ movl(CpuRegister(TMP), Address(temp1, component_offset));
+    __ testl(CpuRegister(TMP), CpuRegister(TMP));
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
+    __ cmpw(Address(CpuRegister(TMP), primitive_offset), Immediate(Primitive::kPrimNot));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Compute base source address, base destination address, and end source address.
+
+  uint32_t element_size = sizeof(int32_t);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp1, Address(src, element_size * constant + offset));
+  } else {
+    __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (dest_pos.IsConstant()) {
+    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp2, Address(dest, element_size * constant + offset));
+  } else {
+    __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (length.IsConstant()) {
+    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp3, Address(temp1, element_size * constant));
+  } else {
+    __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
+  }
+
+  // Iterate over the arrays and do a raw copy of the objects. We don't need to
+  // poison/unpoison, nor do any read barrier as the next uses of the destination
+  // array will do it.
+  NearLabel loop, done;
+  __ cmpl(temp1, temp3);
+  __ j(kEqual, &done);
+  __ Bind(&loop);
+  __ movl(CpuRegister(TMP), Address(temp1, 0));
+  __ movl(Address(temp2, 0), CpuRegister(TMP));
+  __ addl(temp1, Immediate(element_size));
+  __ addl(temp2, Immediate(element_size));
+  __ cmpl(temp1, temp3);
+  __ j(kNotEqual, &loop);
+  __ Bind(&done);
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       CpuRegister(kNoRegister),
+                       false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kCall,
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index ec4a9ec..a036bd5 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -61,7 +61,7 @@
     loop_body_->AddSuccessor(loop_header_);
 
     // Provide boiler-plate instructions.
-    parameter_ = new (&allocator_) HParameterValue(0, Primitive::kPrimNot);
+    parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
     entry_->AddInstruction(parameter_);
     constant_ = graph_->GetIntConstant(42);
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
@@ -106,11 +106,11 @@
   NullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(
       parameter_, Primitive::kPrimLong, MemberOffset(10),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache);
+      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction());
   HInstruction* set_field = new (&allocator_) HInstanceFieldSet(
       parameter_, constant_, Primitive::kPrimInt, MemberOffset(20),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache);
+      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_field->GetBlock(), loop_body_);
@@ -127,11 +127,11 @@
   NullHandle<mirror::DexCache> dex_cache;
   HInstruction* get_field = new (&allocator_) HInstanceFieldGet(
       parameter_, Primitive::kPrimLong, MemberOffset(10),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache);
+      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction());
   HInstruction* set_field = new (&allocator_) HInstanceFieldSet(
       parameter_, get_field, Primitive::kPrimLong, MemberOffset(10),
-      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache);
+      false, kUnknownFieldIndex, graph_->GetDexFile(), dex_cache, 0);
   loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction());
 
   EXPECT_EQ(get_field->GetBlock(), loop_body_);
@@ -146,7 +146,7 @@
 
   // Populate the loop with instructions: set/get array with different types.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong);
+      parameter_, constant_, Primitive::kPrimLong, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
       parameter_, constant_, constant_, Primitive::kPrimInt, 0);
@@ -164,7 +164,7 @@
 
   // Populate the loop with instructions: set/get array with same types.
   HInstruction* get_array = new (&allocator_) HArrayGet(
-      parameter_, constant_, Primitive::kPrimLong);
+      parameter_, constant_, Primitive::kPrimLong, 0);
   loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction());
   HInstruction* set_array = new (&allocator_) HArraySet(
       parameter_, get_array, constant_, Primitive::kPrimLong, 0);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index b9ab290..7f67560 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -77,7 +77,7 @@
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
   ASSERT_EQ(8u, range->GetEnd());
-  HBasicBlock* block = graph->GetBlock(1);
+  HBasicBlock* block = graph->GetBlocks()[1];
   ASSERT_TRUE(block->GetLastInstruction()->IsReturn());
   ASSERT_EQ(8u, block->GetLastInstruction()->GetLifetimePosition());
   ASSERT_TRUE(range->GetNext() == nullptr);
@@ -125,7 +125,7 @@
   ASSERT_EQ(2u, range->GetStart());
   // Last use is the return instruction.
   ASSERT_EQ(22u, range->GetEnd());
-  HBasicBlock* block = graph->GetBlock(3);
+  HBasicBlock* block = graph->GetBlocks()[3];
   ASSERT_TRUE(block->GetLastInstruction()->IsReturn());
   ASSERT_EQ(22u, block->GetLastInstruction()->GetLifetimePosition());
   ASSERT_TRUE(range->GetNext() == nullptr);
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index de4fb7e..d014379 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -481,12 +481,10 @@
                   bool intrinsified = false);
 
   void SetInAt(uint32_t at, Location location) {
-    DCHECK_LT(at, GetInputCount());
     inputs_[at] = location;
   }
 
   Location InAt(uint32_t at) const {
-    DCHECK_LT(at, GetInputCount());
     return inputs_[at];
   }
 
@@ -514,12 +512,10 @@
   }
 
   Location GetTemp(uint32_t at) const {
-    DCHECK_LT(at, GetTempCount());
     return temps_[at];
   }
 
   void SetTempAt(uint32_t at, Location location) {
-    DCHECK_LT(at, GetTempCount());
     DCHECK(temps_[at].IsUnallocated() || temps_[at].IsInvalid());
     temps_[at] = location;
   }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 989970f..24a89bc 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -21,6 +21,7 @@
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
 #include "base/stl_util.h"
+#include "intrinsics.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -54,7 +55,6 @@
       visiting.ClearBit(current_id);
       worklist.pop_back();
     } else {
-      DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size());
       HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
       uint32_t successor_id = successor->GetBlockId();
       if (visiting.IsBitSet(successor_id)) {
@@ -88,7 +88,7 @@
 void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
-      HBasicBlock* block = GetBlock(i);
+      HBasicBlock* block = blocks_[i];
       DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
       for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
         RemoveAsUser(it.Current());
@@ -100,7 +100,7 @@
 void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) {
   for (size_t i = 0; i < blocks_.size(); ++i) {
     if (!visited.IsBitSet(i)) {
-      HBasicBlock* block = GetBlock(i);
+      HBasicBlock* block = blocks_[i];
       // We only need to update the successor, which might be live.
       for (HBasicBlock* successor : block->GetSuccessors()) {
         successor->RemovePredecessor(block);
@@ -174,7 +174,6 @@
     if (successors_visited[current_id] == current->GetSuccessors().size()) {
       worklist.pop_back();
     } else {
-      DCHECK_LT(successors_visited[current_id], current->GetSuccessors().size());
       HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
 
       if (successor->GetDominator() == nullptr) {
@@ -185,7 +184,6 @@
 
       // Once all the forward edges have been visited, we know the immediate
       // dominator of the block. We can then start visiting its successors.
-      DCHECK_LT(successor->GetBlockId(), visits.size());
       if (++visits[successor->GetBlockId()] ==
           successor->GetPredecessors().size() - successor->NumberOfBackEdges()) {
         successor->GetDominator()->AddDominatedBlock(successor);
@@ -257,7 +255,7 @@
     pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc()));
 
     for (size_t pred = 0; pred < header->GetPredecessors().size(); ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessor(pred);
+      HBasicBlock* predecessor = header->GetPredecessors()[pred];
       if (!info->IsBackEdge(*predecessor)) {
         predecessor->ReplaceSuccessor(header, pre_header);
         pred--;
@@ -267,10 +265,10 @@
   }
 
   // Make sure the first predecessor of a loop header is the incoming block.
-  if (info->IsBackEdge(*header->GetPredecessor(0))) {
-    HBasicBlock* to_swap = header->GetPredecessor(0);
+  if (info->IsBackEdge(*header->GetPredecessors()[0])) {
+    HBasicBlock* to_swap = header->GetPredecessors()[0];
     for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
-      HBasicBlock* predecessor = header->GetPredecessor(pred);
+      HBasicBlock* predecessor = header->GetPredecessors()[pred];
       if (!info->IsBackEdge(*predecessor)) {
         header->predecessors_[pred] = to_swap;
         header->predecessors_[0] = predecessor;
@@ -293,7 +291,7 @@
 }
 
 static bool CheckIfPredecessorAtIsExceptional(const HBasicBlock& block, size_t pred_idx) {
-  HBasicBlock* predecessor = block.GetPredecessor(pred_idx);
+  HBasicBlock* predecessor = block.GetPredecessors()[pred_idx];
   if (!predecessor->EndsWithTryBoundary()) {
     // Only edges from HTryBoundary can be exceptional.
     return false;
@@ -343,7 +341,7 @@
       HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
       for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
         if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessor(j)->ReplaceSuccessor(catch_block, normal_block);
+          catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
           --j;
         }
       }
@@ -365,7 +363,7 @@
     // Infer try membership from the first predecessor. Having simplified loops,
     // the first predecessor can never be a back edge and therefore it must have
     // been visited already and had its try membership set.
-    HBasicBlock* first_predecessor = block->GetPredecessor(0);
+    HBasicBlock* first_predecessor = block->GetPredecessors()[0];
     DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
     const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
     if (try_entry != nullptr) {
@@ -385,7 +383,7 @@
     if (block == nullptr) continue;
     if (block->NumberOfNormalSuccessors() > 1) {
       for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
-        HBasicBlock* successor = block->GetSuccessor(j);
+        HBasicBlock* successor = block->GetSuccessors()[j];
         DCHECK(!successor->IsCatchBlock());
         if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
@@ -534,7 +532,7 @@
 void HLoopInformation::Update() {
   HGraph* graph = header_->GetGraph();
   for (uint32_t id : blocks_.Indexes()) {
-    HBasicBlock* block = graph->GetBlock(id);
+    HBasicBlock* block = graph->GetBlocks()[id];
     // Reset loop information of non-header blocks inside the loop, except
     // members of inner nested loops because those should already have been
     // updated by their own LoopInformation.
@@ -743,7 +741,6 @@
 }
 
 void HEnvironment::RemoveAsUserOfInput(size_t index) const {
-  DCHECK_LT(index, Size());
   const HUserRecord<HEnvironment*>& user_record = vregs_[index];
   user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
 }
@@ -1435,7 +1432,7 @@
   // Update links to the successors of `other`.
   successors_.clear();
   while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessor(0);
+    HBasicBlock* successor = other->GetSuccessors()[0];
     successor->ReplacePredecessor(other, this);
   }
 
@@ -1472,7 +1469,7 @@
   // Update links to the successors of `other`.
   successors_.clear();
   while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessor(0);
+    HBasicBlock* successor = other->GetSuccessors()[0];
     successor->ReplacePredecessor(other, this);
   }
 
@@ -1488,11 +1485,11 @@
 
 void HBasicBlock::ReplaceWith(HBasicBlock* other) {
   while (!GetPredecessors().empty()) {
-    HBasicBlock* predecessor = GetPredecessor(0);
+    HBasicBlock* predecessor = GetPredecessors()[0];
     predecessor->ReplaceSuccessor(this, other);
   }
   while (!GetSuccessors().empty()) {
-    HBasicBlock* successor = GetSuccessor(0);
+    HBasicBlock* successor = GetSuccessors()[0];
     successor->ReplacePredecessor(this, other);
   }
   for (HBasicBlock* dominated : GetDominatedBlocks()) {
@@ -1567,9 +1564,9 @@
   if (GetBlocks().size() == 3) {
     // Simple case of an entry block, a body block, and an exit block.
     // Put the body block's instruction into `invoke`'s block.
-    HBasicBlock* body = GetBlock(1);
-    DCHECK(GetBlock(0)->IsEntryBlock());
-    DCHECK(GetBlock(2)->IsExitBlock());
+    HBasicBlock* body = GetBlocks()[1];
+    DCHECK(GetBlocks()[0]->IsEntryBlock());
+    DCHECK(GetBlocks()[2]->IsExitBlock());
     DCHECK(!body->IsExitBlock());
     HInstruction* last = body->GetLastInstruction();
 
@@ -1594,16 +1591,16 @@
     HBasicBlock* at = invoke->GetBlock();
     HBasicBlock* to = at->SplitAfter(invoke);
 
-    HBasicBlock* first = entry_block_->GetSuccessor(0);
+    HBasicBlock* first = entry_block_->GetSuccessors()[0];
     DCHECK(!first->IsInLoop());
     at->MergeWithInlined(first);
     exit_block_->ReplaceWith(to);
 
     // Update all predecessors of the exit block (now the `to` block)
     // to not `HReturn` but `HGoto` instead.
-    bool returns_void = to->GetPredecessor(0)->GetLastInstruction()->IsReturnVoid();
+    bool returns_void = to->GetPredecessors()[0]->GetLastInstruction()->IsReturnVoid();
     if (to->GetPredecessors().size() == 1) {
-      HBasicBlock* predecessor = to->GetPredecessor(0);
+      HBasicBlock* predecessor = to->GetPredecessors()[0];
       HInstruction* last = predecessor->GetLastInstruction();
       if (!returns_void) {
         return_value = last->InputAt(0);
@@ -1873,6 +1870,35 @@
   return false;
 }
 
+void HInvoke::SetIntrinsic(Intrinsics intrinsic,
+                           IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) {
+  intrinsic_ = intrinsic;
+  IntrinsicOptimizations opt(this);
+  if (needs_env_or_cache == kNoEnvironmentOrCache) {
+    opt.SetDoesNotNeedDexCache();
+    opt.SetDoesNotNeedEnvironment();
+  }
+}
+
+bool HInvoke::NeedsEnvironment() const {
+  if (!IsIntrinsic()) {
+    return true;
+  }
+  IntrinsicOptimizations opt(*this);
+  return !opt.GetDoesNotNeedEnvironment();
+}
+
+bool HInvokeStaticOrDirect::NeedsDexCache() const {
+  if (IsRecursive() || IsStringInit()) {
+    return false;
+  }
+  if (!IsIntrinsic()) {
+    return true;
+  }
+  IntrinsicOptimizations opt(*this);
+  return !opt.GetDoesNotNeedDexCache();
+}
+
 void HInstruction::RemoveEnvironmentUsers() {
   for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
     HUseListNode<HEnvironment*>* user_node = use_it.Current();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 489f71d..224c635 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -177,11 +177,6 @@
   ArenaAllocator* GetArena() const { return arena_; }
   const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; }
 
-  HBasicBlock* GetBlock(size_t id) const {
-    DCHECK_LT(id, blocks_.size());
-    return blocks_[id];
-  }
-
   bool IsInSsaForm() const { return in_ssa_form_; }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
@@ -648,20 +643,10 @@
     return predecessors_;
   }
 
-  HBasicBlock* GetPredecessor(size_t pred_idx) const {
-    DCHECK_LT(pred_idx, predecessors_.size());
-    return predecessors_[pred_idx];
-  }
-
   const ArenaVector<HBasicBlock*>& GetSuccessors() const {
     return successors_;
   }
 
-  HBasicBlock* GetSuccessor(size_t succ_idx) const {
-    DCHECK_LT(succ_idx, successors_.size());
-    return successors_[succ_idx];
-  }
-
   bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) {
     return ContainsElement(successors_, block, start_from);
   }
@@ -797,18 +782,18 @@
 
   HBasicBlock* GetSinglePredecessor() const {
     DCHECK_EQ(GetPredecessors().size(), 1u);
-    return GetPredecessor(0);
+    return GetPredecessors()[0];
   }
 
   HBasicBlock* GetSingleSuccessor() const {
     DCHECK_EQ(GetSuccessors().size(), 1u);
-    return GetSuccessor(0);
+    return GetSuccessors()[0];
   }
 
   // Returns whether the first occurrence of `predecessor` in the list of
   // predecessors is at index `idx`.
   bool IsFirstIndexOfPredecessor(HBasicBlock* predecessor, size_t idx) const {
-    DCHECK_EQ(GetPredecessor(idx), predecessor);
+    DCHECK_EQ(GetPredecessors()[idx], predecessor);
     return GetPredecessorIndexOf(predecessor) == idx;
   }
 
@@ -886,7 +871,7 @@
 
   bool IsLoopPreHeaderFirstPredecessor() const {
     DCHECK(IsLoopHeader());
-    return GetPredecessor(0) == GetLoopInformation()->GetPreHeader();
+    return GetPredecessors()[0] == GetLoopInformation()->GetPreHeader();
   }
 
   HLoopInformation* GetLoopInformation() const {
@@ -1559,12 +1544,10 @@
   void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header);
 
   void SetRawEnvAt(size_t index, HInstruction* instruction) {
-    DCHECK_LT(index, Size());
     vregs_[index] = HUserRecord<HEnvironment*>(instruction);
   }
 
   HInstruction* GetInstructionAt(size_t index) const {
-    DCHECK_LT(index, Size());
     return vregs_[index].GetInstruction();
   }
 
@@ -1575,12 +1558,10 @@
   HEnvironment* GetParent() const { return parent_; }
 
   void SetLocationAt(size_t index, Location location) {
-    DCHECK_LT(index, Size());
     locations_[index] = location;
   }
 
   Location GetLocationAt(size_t index) const {
-    DCHECK_LT(index, Size());
     return locations_[index];
   }
 
@@ -1610,7 +1591,6 @@
   void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
     DCHECK(env_use->GetUser() == this);
     size_t index = env_use->GetIndex();
-    DCHECK_LT(index, Size());
     vregs_[index] = HUserRecord<HEnvironment*>(vregs_[index], env_use);
   }
 
@@ -1656,6 +1636,11 @@
     return GetTypeHandle()->IsObjectClass();
   }
 
+  bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsStringClass();
+  }
+
   bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) {
     DCHECK(IsValid());
     return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass();
@@ -1667,15 +1652,36 @@
   }
 
   bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
     return GetTypeHandle()->IsArrayClass();
   }
 
+  bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsPrimitiveArray();
+  }
+
+  bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+  }
+
   bool CanArrayHold(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
     if (!IsExact()) return false;
     if (!IsArrayClass()) return false;
     return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get());
   }
 
+  bool CanArrayHoldValuesOf(ReferenceTypeInfo rti)  const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    if (!IsExact()) return false;
+    if (!IsArrayClass()) return false;
+    if (!rti.IsArrayClass()) return false;
+    return GetTypeHandle()->GetComponentType()->IsAssignableFrom(
+        rti.GetTypeHandle()->GetComponentType());
+  }
+
   Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
 
   bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1715,7 +1721,7 @@
 
 class HInstruction : public ArenaObject<kArenaAllocInstruction> {
  public:
-  HInstruction(SideEffects side_effects, uint32_t dex_pc = kNoDexPc)
+  HInstruction(SideEffects side_effects, uint32_t dex_pc)
       : previous_(nullptr),
         next_(nullptr),
         block_(nullptr),
@@ -2072,7 +2078,7 @@
 template<size_t N>
 class HTemplateInstruction: public HInstruction {
  public:
-  HTemplateInstruction<N>(SideEffects side_effects, uint32_t dex_pc = kNoDexPc)
+  HTemplateInstruction<N>(SideEffects side_effects, uint32_t dex_pc)
       : HInstruction(side_effects, dex_pc), inputs_() {}
   virtual ~HTemplateInstruction() {}
 
@@ -2099,7 +2105,7 @@
 template<>
 class HTemplateInstruction<0>: public HInstruction {
  public:
-  explicit HTemplateInstruction<0>(SideEffects side_effects, uint32_t dex_pc = kNoDexPc)
+  explicit HTemplateInstruction<0>(SideEffects side_effects, uint32_t dex_pc)
       : HInstruction(side_effects, dex_pc) {}
 
   virtual ~HTemplateInstruction() {}
@@ -2125,7 +2131,7 @@
 template<intptr_t N>
 class HExpression : public HTemplateInstruction<N> {
  public:
-  HExpression<N>(Primitive::Type type, SideEffects side_effects, uint32_t dex_pc = kNoDexPc)
+  HExpression<N>(Primitive::Type type, SideEffects side_effects, uint32_t dex_pc)
       : HTemplateInstruction<N>(side_effects, dex_pc), type_(type) {}
   virtual ~HExpression() {}
 
@@ -2315,11 +2321,11 @@
   bool IsControlFlow() const OVERRIDE { return true; }
 
   HBasicBlock* IfTrueSuccessor() const {
-    return GetBlock()->GetSuccessor(0);
+    return GetBlock()->GetSuccessors()[0];
   }
 
   HBasicBlock* IfFalseSuccessor() const {
-    return GetBlock()->GetSuccessor(1);
+    return GetBlock()->GetSuccessors()[1];
   }
 
   DECLARE_INSTRUCTION(If);
@@ -2347,7 +2353,7 @@
   bool IsControlFlow() const OVERRIDE { return true; }
 
   // Returns the block's non-exceptional successor (index zero).
-  HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessor(0); }
+  HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; }
 
   // Returns whether `handler` is among its exception handlers (non-zero index
   // successors).
@@ -2384,7 +2390,7 @@
     : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
 
   bool Done() const { return index_ == block_.GetSuccessors().size(); }
-  HBasicBlock* Current() const { return block_.GetSuccessor(index_); }
+  HBasicBlock* Current() const { return block_.GetSuccessors()[index_]; }
   size_t CurrentSuccessorIndex() const { return index_; }
   void Advance() { ++index_; }
 
@@ -2449,7 +2455,7 @@
 
   HBasicBlock* GetDefaultBlock() const {
     // Last entry is the default block.
-    return GetBlock()->GetSuccessor(num_entries_);
+    return GetBlock()->GetSuccessors()[num_entries_];
   }
   DECLARE_INSTRUCTION(PackedSwitch);
 
@@ -3034,11 +3040,7 @@
  public:
   size_t InputCount() const OVERRIDE { return inputs_.size(); }
 
-  // Runtime needs to walk the stack, so Dex -> Dex calls need to
-  // know their environment.
-  bool NeedsEnvironment() const OVERRIDE {
-    return needs_environment_or_cache_ == kNeedsEnvironmentOrCache;
-  }
+  bool NeedsEnvironment() const OVERRIDE;
 
   void SetArgumentAt(size_t index, HInstruction* argument) {
     SetRawInputAt(index, argument);
@@ -3062,10 +3064,7 @@
     return intrinsic_;
   }
 
-  void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) {
-    intrinsic_ = intrinsic;
-    needs_environment_or_cache_ = needs_env_or_cache;
-  }
+  void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
 
   bool IsFromInlinedInvoke() const {
     return GetEnvironment()->GetParent() != nullptr;
@@ -3073,6 +3072,16 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  uint32_t* GetIntrinsicOptimizations() {
+    return &intrinsic_optimizations_;
+  }
+
+  const uint32_t* GetIntrinsicOptimizations() const {
+    return &intrinsic_optimizations_;
+  }
+
+  bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
+
   DECLARE_INSTRUCTION(Invoke);
 
  protected:
@@ -3092,16 +3101,14 @@
       dex_method_index_(dex_method_index),
       original_invoke_type_(original_invoke_type),
       intrinsic_(Intrinsics::kNone),
-      needs_environment_or_cache_(kNeedsEnvironmentOrCache) {
+      intrinsic_optimizations_(0) {
   }
 
   const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    DCHECK_LT(index, InputCount());
     return inputs_[index];
   }
 
   void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LT(index, InputCount());
     inputs_[index] = input;
   }
 
@@ -3111,7 +3118,9 @@
   const uint32_t dex_method_index_;
   const InvokeType original_invoke_type_;
   Intrinsics intrinsic_;
-  IntrinsicNeedsEnvironmentOrCache needs_environment_or_cache_;
+
+  // A magic word holding optimizations for intrinsics. See intrinsics.h.
+  uint32_t intrinsic_optimizations_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
@@ -3259,10 +3268,7 @@
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
-  bool NeedsDexCache() const OVERRIDE {
-    if (intrinsic_ != Intrinsics::kNone) { return needs_environment_or_cache_; }
-    return !IsRecursive() && !IsStringInit();
-  }
+  bool NeedsDexCache() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
   uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
@@ -3921,24 +3927,31 @@
 // the calling convention.
 class HParameterValue : public HExpression<0> {
  public:
-  HParameterValue(uint8_t index,
+  HParameterValue(const DexFile& dex_file,
+                  uint16_t type_index,
+                  uint8_t index,
                   Primitive::Type parameter_type,
                   bool is_this = false)
       : HExpression(parameter_type, SideEffects::None(), kNoDexPc),
+        dex_file_(dex_file),
+        type_index_(type_index),
         index_(index),
         is_this_(is_this),
         can_be_null_(!is_this) {}
 
+  const DexFile& GetDexFile() const { return dex_file_; }
+  uint16_t GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
+  bool IsThis() const { return is_this_; }
 
   bool CanBeNull() const OVERRIDE { return can_be_null_; }
   void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; }
 
-  bool IsThis() const { return is_this_; }
-
   DECLARE_INSTRUCTION(ParameterValue);
 
  private:
+  const DexFile& dex_file_;
+  const uint16_t type_index_;
   // The index of this parameter in the parameters list. Must be less
   // than HGraph::number_of_in_vregs_.
   const uint8_t index_;
@@ -4125,12 +4138,10 @@
 
  protected:
   const HUserRecord<HInstruction*> InputRecordAt(size_t index) const OVERRIDE {
-    DCHECK_LE(index, InputCount());
     return inputs_[index];
   }
 
   void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
-    DCHECK_LE(index, InputCount());
     inputs_[index] = input;
   }
 
@@ -4210,7 +4221,7 @@
                     uint32_t field_idx,
                     const DexFile& dex_file,
                     Handle<mirror::DexCache> dex_cache,
-                    uint32_t dex_pc = kNoDexPc)
+                    uint32_t dex_pc)
       : HExpression(
             field_type,
             SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc),
@@ -4256,7 +4267,7 @@
                     uint32_t field_idx,
                     const DexFile& dex_file,
                     Handle<mirror::DexCache> dex_cache,
-                    uint32_t dex_pc = kNoDexPc)
+                    uint32_t dex_pc)
       : HTemplateInstruction(
           SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc),
         field_info_(field_offset, field_type, is_volatile, field_idx, dex_file, dex_cache),
@@ -4291,7 +4302,7 @@
   HArrayGet(HInstruction* array,
             HInstruction* index,
             Primitive::Type type,
-            uint32_t dex_pc = kNoDexPc)
+            uint32_t dex_pc)
       : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
@@ -4407,7 +4418,7 @@
 
 class HArrayLength : public HExpression<1> {
  public:
-  explicit HArrayLength(HInstruction* array, uint32_t dex_pc = kNoDexPc)
+  HArrayLength(HInstruction* array, uint32_t dex_pc)
       : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
     // Note that arrays do not change length, so the instruction does not
     // depend on any write.
@@ -4522,13 +4533,20 @@
         generate_clinit_check_(false),
         needs_access_check_(needs_access_check),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+    // Referrers class should not need access check. We never inline unverified
+    // methods so we can't possibly end up in this situation.
+    DCHECK(!is_referrers_class_ || !needs_access_check_);
     SetRawInputAt(0, current_method);
   }
 
   bool CanBeMoved() const OVERRIDE { return true; }
 
   bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
-    return other->AsLoadClass()->type_index_ == type_index_;
+    // Note that we don't need to test for generate_clinit_check_.
+    // Whether or not we need to generate the clinit check is processed in
+    // prepare_for_register_allocator based on existing HInvokes and HClinitChecks.
+    return other->AsLoadClass()->type_index_ == type_index_ &&
+        other->AsLoadClass()->needs_access_check_ == needs_access_check_;
   }
 
   size_t ComputeHashCode() const OVERRIDE { return type_index_; }
@@ -4540,7 +4558,7 @@
   bool NeedsEnvironment() const OVERRIDE {
     // Will call runtime and load the class if the class is not loaded yet.
     // TODO: finer grain decision.
-    return !is_referrers_class_ || needs_access_check_;
+    return !is_referrers_class_;
   }
 
   bool MustGenerateClinitCheck() const {
@@ -4675,7 +4693,7 @@
                   uint32_t field_idx,
                   const DexFile& dex_file,
                   Handle<mirror::DexCache> dex_cache,
-                  uint32_t dex_pc = kNoDexPc)
+                  uint32_t dex_pc)
       : HExpression(
             field_type,
             SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc),
@@ -4718,7 +4736,7 @@
                   uint32_t field_idx,
                   const DexFile& dex_file,
                   Handle<mirror::DexCache> dex_cache,
-                  uint32_t dex_pc = kNoDexPc)
+                  uint32_t dex_pc)
       : HTemplateInstruction(
           SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc),
         field_info_(field_offset, field_type, is_volatile, field_idx, dex_file, dex_cache),
@@ -5240,7 +5258,6 @@
   }
 
   MoveOperands* MoveOperandsAt(size_t index) {
-    DCHECK_LT(index, moves_.size());
     return &moves_[index];
   }
 
@@ -5314,7 +5331,7 @@
   explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
 
   bool Done() const { return index_ == graph_.GetBlocks().size(); }
-  HBasicBlock* Current() const { return graph_.GetBlock(index_); }
+  HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; }
   void Advance() { ++index_; }
 
  private:
@@ -5440,7 +5457,6 @@
       : blocks_in_loop_(info.GetBlocks()),
         blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()),
         index_(0) {
-    DCHECK(!blocks_.empty());
     if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
       Advance();
     }
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 8eeac56..764f5fe 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -34,7 +34,8 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
   entry->AddInstruction(new (&allocator) HGoto());
 
@@ -76,8 +77,10 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
-  HInstruction* parameter2 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
+  HInstruction* parameter2 = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter1);
   entry->AddInstruction(parameter2);
   entry->AddInstruction(new (&allocator) HExit());
@@ -102,7 +105,8 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   ASSERT_FALSE(parameter->HasUses());
@@ -122,7 +126,8 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
   entry->AddInstruction(parameter1);
   entry->AddInstruction(with_environment);
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index ddc5730..f7cc872 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -23,7 +23,8 @@
 class HX86ComputeBaseMethodAddress : public HExpression<0> {
  public:
   // Treat the value as an int32_t, but it is really a 32 bit native pointer.
-  HX86ComputeBaseMethodAddress() : HExpression(Primitive::kPrimInt, SideEffects::None()) {}
+  HX86ComputeBaseMethodAddress()
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {}
 
   DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress);
 
@@ -37,7 +38,7 @@
   HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
                             HConstant* constant,
                             bool needs_materialization = true)
-      : HExpression(constant->GetType(), SideEffects::None()),
+      : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc),
         needs_materialization_(needs_materialization) {
     SetRawInputAt(0, method_base);
     SetRawInputAt(1, constant);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 5177b9a..c7f0806 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -31,6 +31,7 @@
 #include "base/arena_allocator.h"
 #include "base/arena_containers.h"
 #include "base/dumpable.h"
+#include "base/macros.h"
 #include "base/timing_logger.h"
 #include "boolean_simplifier.h"
 #include "bounds_check_elimination.h"
@@ -168,13 +169,13 @@
     if (kIsDebugBuild) {
       if (!graph_in_bad_state_) {
         if (graph_->IsInSsaForm()) {
-          SSAChecker checker(graph_->GetArena(), graph_);
+          SSAChecker checker(graph_);
           checker.Run();
           if (!checker.IsValid()) {
             LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<SSAChecker>(checker);
           }
         } else {
-          GraphChecker checker(graph_->GetArena(), graph_);
+          GraphChecker checker(graph_);
           checker.Run();
           if (!checker.IsValid()) {
             LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
@@ -357,7 +358,8 @@
 }
 
 static bool IsInstructionSetSupported(InstructionSet instruction_set) {
-  return instruction_set == kArm64
+  return (instruction_set == kArm && !kArm32QuickCodeUseSoftFloat)
+      || instruction_set == kArm64
       || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat)
       || instruction_set == kMips64
       || instruction_set == kX86
@@ -534,6 +536,7 @@
   return ArrayRef<const uint8_t>(vector);
 }
 
+NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
 static void AllocateRegisters(HGraph* graph,
                               CodeGenerator* codegen,
                               PassObserver* pass_observer) {
@@ -668,6 +671,7 @@
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
+
   // Always use the thumb2 assembler: some runtime functionality (like implicit stack
   // overflow checks) assume thumb2.
   if (instruction_set == kArm) {
@@ -709,9 +713,6 @@
       &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
       kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
 
-  // For testing purposes, we put a special marker on method names that should be compiled
-  // with this compiler. This makes sure we're not regressing.
-  bool shouldCompile = method_name.find("$opt$") != std::string::npos;
   bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
 
   std::unique_ptr<CodeGenerator> codegen(
@@ -720,7 +721,6 @@
                             *compiler_driver->GetInstructionSetFeatures(),
                             compiler_driver->GetCompilerOptions()));
   if (codegen.get() == nullptr) {
-    CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
@@ -761,8 +761,6 @@
   {
     PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
     if (!builder.BuildGraph(*code_item)) {
-      DCHECK(!(IsCompilingWithCoreImage() && shouldCompile))
-          << "Could not build graph in optimizing compiler";
       pass_observer.SetGraphInBadState();
       return nullptr;
     }
@@ -855,6 +853,16 @@
     }
   }
 
+  if (kIsDebugBuild &&
+      IsCompilingWithCoreImage() &&
+      IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
+    // For testing purposes, we put a special marker on method names that should be compiled
+    // with this compiler. This makes sure we're not regressing.
+    std::string method_name = PrettyMethod(method_idx, dex_file);
+    bool shouldCompile = method_name.find("$opt$") != std::string::npos;
+    DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
+  }
+
   return method;
 }
 
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index fce7769..30bcf19 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -125,7 +125,6 @@
   // which means that a call to PerformMove could change any source operand
   // in the move graph.
 
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   if (move->IsRedundant()) {
@@ -406,7 +405,6 @@
   // we will update source operand in the move graph to reduce dependencies in
   // the graph.
 
-  DCHECK_LT(index, moves_.size());
   MoveOperands* move = moves_[index];
   DCHECK(!move->IsPending());
   DCHECK(!move->IsEliminated());
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index da91cb8..46e6f3e 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -56,7 +56,6 @@
       : ParallelMoveResolverWithSwap(allocator) {}
 
   void EmitMove(size_t index) OVERRIDE {
-    DCHECK_LT(index, moves_.size());
     MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
@@ -69,7 +68,6 @@
   }
 
   void EmitSwap(size_t index) OVERRIDE {
-    DCHECK_LT(index, moves_.size());
     MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
@@ -129,7 +127,6 @@
   void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
 
   void EmitMove(size_t index) OVERRIDE {
-    DCHECK_LT(index, moves_.size());
     MoveOperands* move = moves_[index];
     if (!message_.str().empty()) {
       message_ << " ";
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index 34850a5..429e6e3 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -131,7 +131,7 @@
     PrintString("  ");
     PrintInt(gota->GetId());
     PrintString(": Goto ");
-    PrintInt(current_block_->GetSuccessor(0)->GetBlockId());
+    PrintInt(current_block_->GetSuccessors()[0]->GetBlockId());
     PrintNewLine();
   }
 
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index f7a7e42..abfbcac 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -428,12 +428,21 @@
   UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true);
 }
 
+static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::DexCache* dex_cache =
+      Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false);
+  // Get type from dex cache assuming it was populated by the verifier.
+  return dex_cache->GetResolvedType(type_idx);
+}
+
 void RTPVisitor::VisitParameterValue(HParameterValue* instr) {
   ScopedObjectAccess soa(Thread::Current());
   // We check if the existing type is valid: the inliner may have set it.
   if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
-    // TODO: parse the signature and add precise types for the parameters.
-    SetClassAsTypeInfo(instr, nullptr, /* is_exact */ false);
+    mirror::Class* resolved_class =
+        GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
+    SetClassAsTypeInfo(instr, resolved_class, /* is_exact */ false);
   }
 }
 
@@ -479,11 +488,9 @@
 
 void RTPVisitor::VisitLoadClass(HLoadClass* instr) {
   ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache =
-      Runtime::Current()->GetClassLinker()->FindDexCache(soa.Self(), instr->GetDexFile(), false);
   // Get type from dex cache assuming it was populated by the verifier.
-  mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
-  // TODO: investigating why we are still getting unresolved classes: b/22821472.
+  mirror::Class* resolved_class =
+      GetClassFromDexCache(soa.Self(), instr->GetDexFile(), instr->GetTypeIndex());
   if (resolved_class != nullptr) {
     instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
         handles_->NewHandle(resolved_class), /* is_exact */ true));
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 9cdb89b..6fc7772 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -617,42 +617,40 @@
     // (2) Remove currently active intervals that are dead at this position.
     //     Move active intervals that have a lifetime hole at this position
     //     to inactive.
-    // Note: Copy elements we keep to the beginning, just like
-    //     v.erase(std::remove(v.begin(), v.end(), value), v.end());
-    auto active_kept_end = active_.begin();
-    for (auto it = active_.begin(), end = active_.end(); it != end; ++it) {
-      LiveInterval* interval = *it;
-      if (interval->IsDeadAt(position)) {
-        handled_.push_back(interval);
-      } else if (!interval->Covers(position)) {
-        inactive_.push_back(interval);
-      } else {
-        *active_kept_end++ = interval;  // Keep this interval.
-      }
-    }
-    // We have copied what we want to keep to [active_.begin(), active_kept_end),
-    // the rest of the data in active_ is junk - drop it.
+    auto active_kept_end = std::remove_if(
+        active_.begin(),
+        active_.end(),
+        [this, position](LiveInterval* interval) {
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (!interval->Covers(position)) {
+            inactive_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
     active_.erase(active_kept_end, active_.end());
 
     // (3) Remove currently inactive intervals that are dead at this position.
     //     Move inactive intervals that cover this position to active.
-    // Note: Copy elements we keep to the beginning, just like
-    //     v.erase(std::remove(v.begin(), v.begin() + num, value), v.begin() + num);
-    auto inactive_kept_end = inactive_.begin();
     auto inactive_to_handle_end = inactive_.begin() + inactive_intervals_to_handle;
-    for (auto it = inactive_.begin(); it != inactive_to_handle_end; ++it) {
-      LiveInterval* interval = *it;
-      DCHECK(interval->GetStart() < position || interval->IsFixed());
-      if (interval->IsDeadAt(position)) {
-        handled_.push_back(interval);
-      } else if (interval->Covers(position)) {
-        active_.push_back(interval);
-      } else {
-        *inactive_kept_end++ = interval;  // Keep this interval.
-      }
-    }
-    // We have copied what we want to keep to [inactive_.begin(), inactive_kept_end),
-    // the rest of the data in the processed interval is junk - drop it.
+    auto inactive_kept_end = std::remove_if(
+        inactive_.begin(),
+        inactive_to_handle_end,
+        [this, position](LiveInterval* interval) {
+          DCHECK(interval->GetStart() < position || interval->IsFixed());
+          if (interval->IsDeadAt(position)) {
+            handled_.push_back(interval);
+            return true;
+          } else if (interval->Covers(position)) {
+            active_.push_back(interval);
+            return true;
+          } else {
+            return false;  // Keep this interval.
+          }
+        });
     inactive_.erase(inactive_kept_end, inactive_to_handle_end);
 
     if (current->IsSlowPathSafepoint()) {
@@ -1894,7 +1892,7 @@
       for (HInstructionIterator inst_it(current->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
         HInstruction* phi = inst_it.Current();
         for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
-          HBasicBlock* predecessor = current->GetPredecessor(i);
+          HBasicBlock* predecessor = current->GetPredecessors()[i];
           DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u);
           HInstruction* input = phi->InputAt(i);
           Location source = input->GetLiveInterval()->GetLocationAt(
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 2bb5a8b..ed5419e 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -312,7 +312,7 @@
   register_allocator.AllocateRegisters();
   ASSERT_TRUE(register_allocator.Validate(false));
 
-  HBasicBlock* loop_header = graph->GetBlock(2);
+  HBasicBlock* loop_header = graph->GetBlocks()[2];
   HPhi* phi = loop_header->GetFirstPhi()->AsPhi();
 
   LiveInterval* phi_interval = phi->GetLiveInterval();
@@ -321,7 +321,7 @@
   ASSERT_TRUE(loop_update->HasRegister());
   ASSERT_NE(phi_interval->GetRegister(), loop_update->GetRegister());
 
-  HBasicBlock* return_block = graph->GetBlock(3);
+  HBasicBlock* return_block = graph->GetBlocks()[3];
   HReturn* ret = return_block->GetLastInstruction()->AsReturn();
   ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister());
 }
@@ -343,8 +343,8 @@
   SsaLivenessAnalysis liveness(graph, &codegen);
   liveness.Analyze();
 
-  HXor* first_xor = graph->GetBlock(1)->GetFirstInstruction()->AsXor();
-  HXor* last_xor = graph->GetBlock(1)->GetLastInstruction()->GetPrevious()->AsXor();
+  HXor* first_xor = graph->GetBlocks()[1]->GetFirstInstruction()->AsXor();
+  HXor* last_xor = graph->GetBlocks()[1]->GetLastInstruction()->GetPrevious()->AsXor();
   ASSERT_EQ(last_xor->InputAt(0), first_xor);
   LiveInterval* interval = first_xor->GetLiveInterval();
   ASSERT_EQ(interval->GetEnd(), last_xor->GetLifetimePosition());
@@ -475,7 +475,8 @@
   NullHandle<mirror::DexCache> dex_cache;
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (allocator) HBasicBlock(graph);
@@ -488,7 +489,8 @@
                                                          false,
                                                          kUnknownFieldIndex,
                                                          graph->GetDexFile(),
-                                                         dex_cache);
+                                                         dex_cache,
+                                                         0);
   block->AddInstruction(test);
   block->AddInstruction(new (allocator) HIf(test));
   HBasicBlock* then = new (allocator) HBasicBlock(graph);
@@ -513,14 +515,16 @@
                                               false,
                                               kUnknownFieldIndex,
                                               graph->GetDexFile(),
-                                              dex_cache);
+                                              dex_cache,
+                                              0);
 *input2 = new (allocator) HInstanceFieldGet(parameter,
                                             Primitive::kPrimInt,
                                             MemberOffset(42),
                                             false,
                                             kUnknownFieldIndex,
                                             graph->GetDexFile(),
-                                            dex_cache);
+                                            dex_cache,
+                                            0);
   then->AddInstruction(*input1);
   else_->AddInstruction(*input2);
   join->AddInstruction(new (allocator) HExit());
@@ -621,7 +625,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimNot);
   entry->AddInstruction(parameter);
 
   HBasicBlock* block = new (allocator) HBasicBlock(graph);
@@ -634,7 +639,8 @@
                                              false,
                                              kUnknownFieldIndex,
                                              graph->GetDexFile(),
-                                             dex_cache);
+                                             dex_cache,
+                                             0);
   block->AddInstruction(*field);
   *ret = new (allocator) HReturn(*field);
   block->AddInstruction(*ret);
@@ -694,7 +700,8 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* parameter = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry->AddInstruction(parameter);
 
   HInstruction* constant1 = graph->GetIntConstant(1);
@@ -764,8 +771,10 @@
   HBasicBlock* entry = new (allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* first = new (allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* second = new (allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* first = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* second = new (allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry->AddInstruction(first);
   entry->AddInstruction(second);
 
@@ -816,10 +825,14 @@
   HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
   graph->AddBlock(entry);
   graph->SetEntryBlock(entry);
-  HInstruction* one = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* two = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* three = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
-  HInstruction* four = new (&allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* one = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* two = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* three = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  HInstruction* four = new (&allocator) HParameterValue(
+      graph->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry->AddInstruction(one);
   entry->AddInstruction(two);
   entry->AddInstruction(three);
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 338a3aa..1dc6986 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -76,18 +76,15 @@
 
 SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const {
   DCHECK(block->IsLoopHeader());
-  DCHECK_LT(block->GetBlockId(), loop_effects_.size());
   return loop_effects_[block->GetBlockId()];
 }
 
 SideEffects SideEffectsAnalysis::GetBlockEffects(HBasicBlock* block) const {
-  DCHECK_LT(block->GetBlockId(), block_effects_.size());
   return block_effects_[block->GetBlockId()];
 }
 
 void SideEffectsAnalysis::UpdateLoopEffects(HLoopInformation* info, SideEffects effects) {
   uint32_t id = info->GetHeader()->GetBlockId();
-  DCHECK_LT(id, loop_effects_.size());
   loop_effects_[id] = loop_effects_[id].Union(effects);
 }
 
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 40c75af..4565590 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -389,7 +389,6 @@
 }
 
 ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
-  DCHECK_LT(block->GetBlockId(), locals_for_.size());
   ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
   const size_t vregs = GetGraph()->GetNumberOfVRegs();
   if (locals->empty() && vregs != 0u) {
@@ -417,7 +416,6 @@
 
 HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
   ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
-  DCHECK_LT(local, locals->size());
   return (*locals)[local];
 }
 
@@ -467,7 +465,7 @@
     for (size_t local = 0; local < current_locals_->size(); ++local) {
       bool one_predecessor_has_no_value = false;
       bool is_different = false;
-      HInstruction* value = ValueOfLocal(block->GetPredecessor(0), local);
+      HInstruction* value = ValueOfLocal(block->GetPredecessors()[0], local);
 
       for (HBasicBlock* predecessor : block->GetPredecessors()) {
         HInstruction* current = ValueOfLocal(predecessor, local);
@@ -489,7 +487,7 @@
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
             GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
         for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
-          HInstruction* pred_value = ValueOfLocal(block->GetPredecessor(i), local);
+          HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
           phi->SetRawInputAt(i, pred_value);
         }
         block->AddPhi(phi);
@@ -626,7 +624,6 @@
 }
 
 void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
-  DCHECK_LT(load->GetLocal()->GetRegNumber(), current_locals_->size());
   HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()];
   // If the operation requests a specific type, we make sure its input is of that type.
   if (load->GetType() != value->GetType()) {
@@ -641,7 +638,6 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  DCHECK_LT(store->GetLocal()->GetRegNumber(), current_locals_->size());
   (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
   store->GetBlock()->RemoveInstruction(store);
 }
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index b869d57..b9d8731 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -159,7 +159,6 @@
 void SsaLivenessAnalysis::ComputeLiveness() {
   for (HLinearOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
-    DCHECK_LT(block->GetBlockId(), block_infos_.size());
     block_infos_[block->GetBlockId()] =
         new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_);
   }
@@ -388,14 +387,14 @@
         }
         // If the instruction dies at the phi assignment, we can try having the
         // same register.
-        if (end == user->GetBlock()->GetPredecessor(input_index)->GetLifetimeEnd()) {
+        if (end == user->GetBlock()->GetPredecessors()[input_index]->GetLifetimeEnd()) {
           for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
             if (i == input_index) {
               continue;
             }
             HInstruction* input = user->InputAt(i);
             Location location = input->GetLiveInterval()->GetLocationAt(
-                user->GetBlock()->GetPredecessor(i)->GetLifetimeEnd() - 1);
+                user->GetBlock()->GetPredecessors()[i]->GetLifetimeEnd() - 1);
             if (location.IsRegisterKind()) {
               int reg = RegisterOrLowRegister(location);
               if (free_until[reg] >= use_position) {
@@ -432,7 +431,6 @@
     const ArenaVector<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
     for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
       HInstruction* input = defined_by_->InputAt(i);
-      DCHECK_LT(i, predecessors.size());
       size_t end = predecessors[i]->GetLifetimeEnd();
       LiveInterval* input_interval = input->GetLiveInterval()->GetSiblingAt(end - 1);
       if (input_interval->GetEnd() == end) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index e4b0999..572a7b6 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -1117,27 +1117,22 @@
   void Analyze();
 
   BitVector* GetLiveInSet(const HBasicBlock& block) const {
-    DCHECK_LT(block.GetBlockId(), block_infos_.size());
     return &block_infos_[block.GetBlockId()]->live_in_;
   }
 
   BitVector* GetLiveOutSet(const HBasicBlock& block) const {
-    DCHECK_LT(block.GetBlockId(), block_infos_.size());
     return &block_infos_[block.GetBlockId()]->live_out_;
   }
 
   BitVector* GetKillSet(const HBasicBlock& block) const {
-    DCHECK_LT(block.GetBlockId(), block_infos_.size());
     return &block_infos_[block.GetBlockId()]->kill_;
   }
 
   HInstruction* GetInstructionFromSsaIndex(size_t index) const {
-    DCHECK_LT(index, instructions_from_ssa_index_.size());
     return instructions_from_ssa_index_[index];
   }
 
   HInstruction* GetInstructionFromPosition(size_t index) const {
-    DCHECK_LT(index, instructions_from_lifetime_position_.size());
     return instructions_from_lifetime_position_[index];
   }
 
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index f27cecc..c60a4ea 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -24,6 +24,7 @@
                                         uint32_t num_dex_registers,
                                         uint8_t inlining_depth) {
   DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
+  DCHECK_NE(dex_pc, static_cast<uint32_t>(-1)) << "invalid dex_pc";
   current_entry_.dex_pc = dex_pc;
   current_entry_.native_pc_offset = native_pc_offset;
   current_entry_.register_mask = register_mask;
@@ -209,7 +210,6 @@
       // Entries with the same dex map will have the same offset.
     }
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
-      DCHECK_LT(inline_info_index, inline_infos_.size());
       InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers,
                                         inline_entry.live_dex_registers_mask);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 4783e28..fc27a2b 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -63,6 +63,7 @@
       : allocator_(allocator),
         stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)),
         location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
+        location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
@@ -136,12 +137,10 @@
   }
 
   const StackMapEntry& GetStackMap(size_t i) const {
-    DCHECK_LT(i, stack_maps_.size());
     return stack_maps_[i];
   }
 
   void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
-    DCHECK_LT(i, stack_maps_.size());
     stack_maps_[i].native_pc_offset = native_pc_offset;
   }
 
@@ -175,8 +174,10 @@
   ArenaVector<DexRegisterLocation> location_catalog_entries_;
   // Map from Dex register location catalog entries to their indices in the
   // location catalog.
-  typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn,
-                  DexRegisterLocationHashFn> LocationCatalogEntriesIndices;
+  using LocationCatalogEntriesIndices = ArenaHashMap<DexRegisterLocation,
+                                                     size_t,
+                                                     LocationCatalogEntriesIndicesEmptyFn,
+                                                     DexRegisterLocationHashFn>;
   LocationCatalogEntriesIndices location_catalog_entries_indices_;
 
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index e745d94..b6c704c 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -36,7 +36,7 @@
   bool graph_built = builder.BuildGraph(*item);
   ASSERT_TRUE(graph_built);
 
-  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessor(0);
+  HBasicBlock* first_block = graph->GetEntryBlock()->GetSuccessors()[0];
   HInstruction* first_instruction = first_block->GetFirstInstruction();
   // Account for some tests having a store local as first instruction.
   ASSERT_TRUE(first_instruction->IsSuspendCheck()
diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h
index 48f0328..5c33639 100644
--- a/compiler/utils/array_ref.h
+++ b/compiler/utils/array_ref.h
@@ -77,15 +77,19 @@
       : array_(array_in), size_(size_in) {
   }
 
-  template <typename Alloc>
-  explicit ArrayRef(std::vector<T, Alloc>& v)
+  template <typename Vector,
+            typename = typename std::enable_if<
+                std::is_same<typename Vector::value_type, value_type>::value>::type>
+  explicit ArrayRef(Vector& v)
       : array_(v.data()), size_(v.size()) {
   }
 
-  template <typename U, typename Alloc>
-  explicit ArrayRef(const std::vector<U, Alloc>& v,
-                    typename std::enable_if<std::is_same<T, const U>::value, tag>::type
-                        t ATTRIBUTE_UNUSED = tag())
+  template <typename Vector,
+            typename = typename std::enable_if<
+                std::is_same<
+                    typename std::add_const<typename Vector::value_type>::type,
+                    value_type>::value>::type>
+  explicit ArrayRef(const Vector& v)
       : array_(v.data()), size_(v.size()) {
   }
 
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index c8b3fe5..8c71292 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -67,12 +67,20 @@
 
   // This is intended to be run as a test.
   bool CheckTools() {
-    if (!FileExists(FindTool(assembler_cmd_name_))) {
+    std::string asm_tool = FindTool(assembler_cmd_name_);
+    if (!FileExists(asm_tool)) {
+      LOG(ERROR) << "Could not find assembler from " << assembler_cmd_name_;
+      LOG(ERROR) << "FindTool returned " << asm_tool;
+      FindToolDump(assembler_cmd_name_);
       return false;
     }
     LOG(INFO) << "Chosen assembler command: " << GetAssemblerCommand();
 
-    if (!FileExists(FindTool(objdump_cmd_name_))) {
+    std::string objdump_tool = FindTool(objdump_cmd_name_);
+    if (!FileExists(objdump_tool)) {
+      LOG(ERROR) << "Could not find objdump from " << objdump_cmd_name_;
+      LOG(ERROR) << "FindTool returned " << objdump_tool;
+      FindToolDump(objdump_cmd_name_);
       return false;
     }
     LOG(INFO) << "Chosen objdump command: " << GetObjdumpCommand();
@@ -80,7 +88,11 @@
     // Disassembly is optional.
     std::string disassembler = GetDisassembleCommand();
     if (disassembler.length() != 0) {
-      if (!FileExists(FindTool(disassembler_cmd_name_))) {
+      std::string disassembler_tool = FindTool(disassembler_cmd_name_);
+      if (!FileExists(disassembler_tool)) {
+        LOG(ERROR) << "Could not find disassembler from " << disassembler_cmd_name_;
+        LOG(ERROR) << "FindTool returned " << disassembler_tool;
+        FindToolDump(disassembler_cmd_name_);
         return false;
       }
       LOG(INFO) << "Chosen disassemble command: " << GetDisassembleCommand();
@@ -493,7 +505,7 @@
     std::string error_msg;
     if (!Exec(args, &error_msg)) {
       EXPECT_TRUE(false) << error_msg;
-      return "";
+      UNREACHABLE();
     }
 
     std::ifstream in(tmp_file.c_str());
@@ -508,6 +520,54 @@
     return line;
   }
 
+  // Helper for below. If name_predicate is empty, search for all files, otherwise use it for the
+  // "-name" option.
+  static void FindToolDumpPrintout(std::string name_predicate, std::string tmp_file) {
+    std::string gcc_path = GetRootPath() + GetGCCRootPath();
+    std::vector<std::string> args;
+    args.push_back("find");
+    args.push_back(gcc_path);
+    if (!name_predicate.empty()) {
+      args.push_back("-name");
+      args.push_back(name_predicate);
+    }
+    args.push_back("|");
+    args.push_back("sort");
+    args.push_back(">");
+    args.push_back(tmp_file);
+    std::string sh_args = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(sh_args);
+
+    std::string error_msg;
+    if (!Exec(args, &error_msg)) {
+      EXPECT_TRUE(false) << error_msg;
+      UNREACHABLE();
+    }
+
+    LOG(ERROR) << "FindToolDump: gcc_path=" << gcc_path
+               << " cmd=" << sh_args;
+    std::ifstream in(tmp_file.c_str());
+    if (in) {
+      std::string line;
+      while (std::getline(in, line)) {
+        LOG(ERROR) << line;
+      }
+    }
+    in.close();
+    std::remove(tmp_file.c_str());
+  }
+
+  // For debug purposes.
+  void FindToolDump(std::string tool_name) {
+    // Check with the tool name.
+    FindToolDumpPrintout(architecture_string_ + "*" + tool_name, GetTmpnam());
+    FindToolDumpPrintout("", GetTmpnam());
+  }
+
   // Use a consistent tmpnam, so store it.
   std::string GetTmpnam() {
     if (tmpnam_.length() == 0) {
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index e248604..b9d81a7 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -49,6 +49,7 @@
 #include "mirror/object_array-inl.h"
 #include "oat.h"
 #include "oat_file-inl.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "output_stream.h"
 #include "safe_map.h"
@@ -1563,13 +1564,15 @@
     }
     os << "\n";
 
-    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Runtime* const runtime = Runtime::Current();
+    ClassLinker* class_linker = runtime->GetClassLinker();
     std::string image_filename = image_space_.GetImageFilename();
     std::string oat_location = ImageHeader::GetOatLocationFromImageLocation(image_filename);
     os << "OAT LOCATION: " << oat_location;
     os << "\n";
     std::string error_msg;
-    const OatFile* oat_file = class_linker->FindOpenedOatFileFromOatLocation(oat_location);
+    const OatFile* oat_file = runtime->GetOatFileManager().FindOpenedOatFileFromOatLocation(
+        oat_location);
     if (oat_file == nullptr) {
       oat_file = OatFile::Open(oat_location, oat_location,
                                nullptr, nullptr, false, nullptr,
@@ -1594,7 +1597,7 @@
     os << "OBJECTS:\n" << std::flush;
 
     // Loop through all the image spaces and dump their objects.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
+    gc::Heap* heap = runtime->GetHeap();
     const std::vector<gc::space::ContinuousSpace*>& spaces = heap->GetContinuousSpaces();
     Thread* self = Thread::Current();
     {
@@ -2394,7 +2397,8 @@
   // Need to register dex files to get a working dex cache.
   ScopedObjectAccess soa(self);
   ClassLinker* class_linker = runtime->GetClassLinker();
-  class_linker->RegisterOatFile(oat_file);
+  Runtime::Current()->GetOatFileManager().RegisterOatFile(
+      std::unique_ptr<const OatFile>(oat_file));
   std::vector<const DexFile*> class_path;
   for (const OatFile::OatDexFile* odf : oat_file->GetOatDexFiles()) {
     std::string error_msg;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 8d81f2a..2eb5db1 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -154,6 +154,7 @@
   oat.cc \
   oat_file.cc \
   oat_file_assistant.cc \
+  oat_file_manager.cc \
   object_lock.cc \
   offsets.cc \
   os_linux.cc \
@@ -203,7 +204,6 @@
   arch/x86/registers_x86.cc \
   arch/x86_64/registers_x86_64.cc \
   entrypoints/entrypoint_utils.cc \
-  entrypoints/interpreter/interpreter_entrypoints.cc \
   entrypoints/jni/jni_entrypoints.cc \
   entrypoints/math_entrypoints.cc \
   entrypoints/quick/quick_alloc_entrypoints.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 1599025..76c7c4f 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -47,12 +46,7 @@
 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index e9c816f..371cbb2 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -30,12 +29,7 @@
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index 1f2ce02..395cee8 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -30,7 +30,7 @@
 
   // Look for variants that need a fix for a53 erratum 835769.
   static const char* arm64_variants_with_a53_835769_bug[] = {
-      "default", "generic"  // Pessimistically assume all generic ARM64s are A53s.
+      "default", "generic", "cortex-a53"  // Pessimistically assume all generic ARM64s are A53s.
   };
   bool needs_a53_835769_fix = FindVariantInArray(arm64_variants_with_a53_835769_bug,
                                                  arraysize(arm64_variants_with_a53_835769_bug),
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 6721e54..59421dd 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -15,7 +15,6 @@
  */
 
 #include "atomic.h"
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -59,12 +58,7 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 9f1f0e0..417d5fc 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -15,7 +15,6 @@
  */
 
 #include "atomic.h"
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -57,12 +56,7 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index fe04bf5..ef5edbb 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -57,3 +57,190 @@
 GENERATE_ALLOC_ENTRYPOINTS _region_tlab, RegionTLAB
 GENERATE_ALLOC_ENTRYPOINTS _region_tlab_instrumented, RegionTLABInstrumented
 .endm
+
+// Generate the allocation entrypoints for each allocator. This is used as an alternative to
+// GNERATE_ALL_ALLOC_ENTRYPOINTS for selectively implementing allocation fast paths in
+// hand-written assembly.
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
+  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
+  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+.macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
+
+// This is to be separately defined for each architecture to allow a hand-written assembly fast path.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
+.endm
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 10fc281..019546f 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -31,12 +30,7 @@
 // Read barrier entrypoints.
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 3afc4d5..4a106e4 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -16,6 +16,8 @@
 
 #include "asm_support_x86.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
 // For x86, the CFA is esp+4, the address above the pushed return address on the stack.
 
     /*
@@ -760,7 +762,7 @@
     END_FUNCTION VAR(c_name)
 END_MACRO
 
-MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
+MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
     testl %eax, %eax               // eax == 0 ?
     jz  1f                         // if eax == 0 goto 1
     ret                            // return
@@ -785,195 +787,13 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
-// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
-// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
-// multi-line macros that use each other (hence using 1 macro per newline below).
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
-  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
-  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
-
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
-
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 5cc72e3..eae09ee 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
@@ -32,16 +31,11 @@
 // Read barrier entrypoints.
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints) {
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
-  UNUSED(ipoints, jpoints, qpoints);
+  UNUSED(jpoints, qpoints);
   UNIMPLEMENTED(FATAL);
 #else
-  // Interpreter
-  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
-  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
-
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 1133203..5c413d2 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -16,6 +16,8 @@
 
 #include "asm_support_x86_64.S"
 
+#include "arch/quick_alloc_entrypoints.S"
+
 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
     // Create space for ART FP callee-saved registers
     subq MACRO_LITERAL(4 * 8), %rsp
@@ -780,7 +782,7 @@
     END_FUNCTION VAR(c_name)
 END_MACRO
 
-MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
+MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
     testq %rax, %rax               // rax == 0 ?
     jz  1f                         // if rax == 0 goto 1
     ret                            // return
@@ -806,113 +808,8 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
-// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
-// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
-// multi-line macros that use each other (hence using 1 macro per newline below).
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
-  FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
-  THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
-  ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
-
+GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
+// A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
     // RDI: uint32_t type_idx, RSI: ArtMethod*
@@ -954,95 +851,18 @@
     movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
     ret                                                        // Fast path succeeded.
 .Lart_quick_alloc_object_tlab_slow_path:
-    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
     // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeTLAB)      // cxx_name(arg0, arg1, Thread*)
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO         // return or deliver exception
+    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeTLAB)                    // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_FUNCTION art_quick_alloc_object_tlab
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
-
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
-
-ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
-ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 
 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 1b569fe..d98fc51 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -109,7 +109,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 152 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 6f2aa46..1704688 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -93,6 +93,7 @@
   "StackMapStm  ",
   "CodeGen      ",
   "ParallelMove ",
+  "GraphChecker ",
 };
 
 template <bool kCount>
@@ -156,6 +157,18 @@
 // Explicitly instantiate the used implementation.
 template class ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations>;
 
+void ArenaAllocatorMemoryTool::DoMakeDefined(void* ptr, size_t size) {
+  MEMORY_TOOL_MAKE_DEFINED(ptr, size);
+}
+
+void ArenaAllocatorMemoryTool::DoMakeUndefined(void* ptr, size_t size) {
+  MEMORY_TOOL_MAKE_UNDEFINED(ptr, size);
+}
+
+void ArenaAllocatorMemoryTool::DoMakeInaccessible(void* ptr, size_t size) {
+  MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
+}
+
 Arena::Arena() : bytes_allocated_(0), next_(nullptr) {
 }
 
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 565b416..004895a 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -105,6 +105,7 @@
   kArenaAllocStackMapStream,
   kArenaAllocCodeGenerator,
   kArenaAllocParallelMoveResolver,
+  kArenaAllocGraphChecker,
   kNumArenaAllocKinds
 };
 
@@ -180,20 +181,25 @@
   using ArenaAllocatorMemoryToolCheck::IsRunningOnMemoryTool;
 
   void MakeDefined(void* ptr, size_t size) {
-    if (IsRunningOnMemoryTool()) {
-      MEMORY_TOOL_MAKE_DEFINED(ptr, size);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      DoMakeDefined(ptr, size);
     }
   }
   void MakeUndefined(void* ptr, size_t size) {
-    if (IsRunningOnMemoryTool()) {
-      MEMORY_TOOL_MAKE_UNDEFINED(ptr, size);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      DoMakeUndefined(ptr, size);
     }
   }
   void MakeInaccessible(void* ptr, size_t size) {
-    if (IsRunningOnMemoryTool()) {
-      MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      DoMakeInaccessible(ptr, size);
     }
   }
+
+ private:
+  void DoMakeDefined(void* ptr, size_t size);
+  void DoMakeUndefined(void* ptr, size_t size);
+  void DoMakeInaccessible(void* ptr, size_t size);
 };
 
 class Arena {
diff --git a/runtime/base/arena_containers.h b/runtime/base/arena_containers.h
index e7ea09d..9174d2d 100644
--- a/runtime/base/arena_containers.h
+++ b/runtime/base/arena_containers.h
@@ -20,9 +20,12 @@
 #include <deque>
 #include <queue>
 #include <set>
-#include <vector>
+#include <utility>
 
 #include "arena_allocator.h"
+#include "base/dchecked_vector.h"
+#include "hash_map.h"
+#include "hash_set.h"
 #include "safe_map.h"
 
 namespace art {
@@ -48,7 +51,7 @@
 using ArenaQueue = std::queue<T, ArenaDeque<T>>;
 
 template <typename T>
-using ArenaVector = std::vector<T, ArenaAllocatorAdapter<T>>;
+using ArenaVector = dchecked_vector<T, ArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
 using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>;
@@ -57,6 +60,24 @@
 using ArenaSafeMap =
     SafeMap<K, V, Comparator, ArenaAllocatorAdapter<std::pair<const K, V>>>;
 
+template <typename T,
+          typename EmptyFn = DefaultEmptyFn<T>,
+          typename HashFn = std::hash<T>,
+          typename Pred = std::equal_to<T>>
+using ArenaHashSet = HashSet<T, EmptyFn, HashFn, Pred, ArenaAllocatorAdapter<T>>;
+
+template <typename Key,
+          typename Value,
+          typename EmptyFn = DefaultEmptyFn<std::pair<Key, Value>>,
+          typename HashFn = std::hash<Key>,
+          typename Pred = std::equal_to<Key>>
+using ArenaHashMap = HashMap<Key,
+                             Value,
+                             EmptyFn,
+                             HashFn,
+                             Pred,
+                             ArenaAllocatorAdapter<std::pair<Key, Value>>>;
+
 // Implementation details below.
 
 template <bool kCount>
@@ -164,11 +185,13 @@
     arena_allocator_->MakeInaccessible(p, sizeof(T) * n);
   }
 
-  void construct(pointer p, const_reference val) {
-    new (static_cast<void*>(p)) value_type(val);
+  template <typename U, typename... Args>
+  void construct(U* p, Args&&... args) {
+    ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
   }
-  void destroy(pointer p) {
-    p->~value_type();
+  template <typename U>
+  void destroy(U* p) {
+    p->~U();
   }
 
  private:
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index cfd3d24..5e97a63 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -312,10 +312,6 @@
   }
 }
 
-#if defined(__clang__) && defined(__ARM_64BIT_STATE)
-// b/19180814 When POPCOUNT is inlined, boot up failed on arm64 devices.
-__attribute__((optnone))
-#endif
 uint32_t BitVector::NumSetBits(const uint32_t* storage, uint32_t end) {
   uint32_t word_end = WordIndex(end);
   uint32_t partial_word_bits = end & 0x1f;
diff --git a/runtime/base/dchecked_vector.h b/runtime/base/dchecked_vector.h
new file mode 100644
index 0000000..6ec573a
--- /dev/null
+++ b/runtime/base/dchecked_vector.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_DCHECKED_VECTOR_H_
+#define ART_RUNTIME_BASE_DCHECKED_VECTOR_H_
+
+#include <algorithm>
+#include <type_traits>
+#include <vector>
+
+#include "base/logging.h"
+
+namespace art {
+
+// Template class serving as a replacement for std::vector<> but adding
+// DCHECK()s for the subscript operator, front(), back(), pop_back(),
+// and for insert()/emplace()/erase() positions.
+//
+// Note: The element accessor at() is specified as throwing std::out_of_range
+// but we do not use exceptions, so this accessor is deliberately hidden.
+// Note: The common pattern &v[0] used to retrieve pointer to the data is not
+// valid for an empty dchecked_vector<>. Use data() to avoid checking empty().
+template <typename T, typename Alloc>
+class dchecked_vector : private std::vector<T, Alloc> {
+ private:
+  // std::vector<> has a slightly different specialization for bool. We don't provide that.
+  static_assert(!std::is_same<T, bool>::value, "Not implemented for bool.");
+  using Base = std::vector<T, Alloc>;
+
+ public:
+  using typename Base::value_type;
+  using typename Base::allocator_type;
+  using typename Base::reference;
+  using typename Base::const_reference;
+  using typename Base::pointer;
+  using typename Base::const_pointer;
+  using typename Base::iterator;
+  using typename Base::const_iterator;
+  using typename Base::reverse_iterator;
+  using typename Base::const_reverse_iterator;
+  using typename Base::size_type;
+  using typename Base::difference_type;
+
+  // Construct/copy/destroy.
+  dchecked_vector()
+      : Base() { }
+  explicit dchecked_vector(const allocator_type& alloc)
+      : Base(alloc) { }
+  explicit dchecked_vector(size_type n, const allocator_type& alloc = allocator_type())
+      : Base(n, alloc) { }
+  dchecked_vector(size_type n,
+                  const value_type& value,
+                  const allocator_type& alloc = allocator_type())
+      : Base(n, value, alloc) { }
+  template <typename InputIterator>
+  dchecked_vector(InputIterator first,
+                  InputIterator last,
+                  const allocator_type& alloc = allocator_type())
+      : Base(first, last, alloc) { }
+  dchecked_vector(const dchecked_vector& src)
+      : Base(src) { }
+  dchecked_vector(const dchecked_vector& src, const allocator_type& alloc)
+      : Base(src, alloc) { }
+  dchecked_vector(dchecked_vector&& src)
+      : Base(std::move(src)) { }
+  dchecked_vector(dchecked_vector&& src, const allocator_type& alloc)
+      : Base(std::move(src), alloc) { }
+  dchecked_vector(std::initializer_list<value_type> il,
+                  const allocator_type& alloc = allocator_type())
+      : Base(il, alloc) { }
+  ~dchecked_vector() = default;
+  dchecked_vector& operator=(const dchecked_vector& src) {
+    Base::operator=(src);
+    return *this;
+  }
+  dchecked_vector& operator=(dchecked_vector&& src) {
+    Base::operator=(std::move(src));
+    return *this;
+  }
+  dchecked_vector& operator=(std::initializer_list<value_type> il) {
+    Base::operator=(il);
+    return *this;
+  }
+
+  // Iterators.
+  using Base::begin;
+  using Base::end;
+  using Base::rbegin;
+  using Base::rend;
+  using Base::cbegin;
+  using Base::cend;
+  using Base::crbegin;
+  using Base::crend;
+
+  // Capacity.
+  using Base::size;
+  using Base::max_size;
+  using Base::resize;
+  using Base::capacity;
+  using Base::empty;
+  using Base::reserve;
+  using Base::shrink_to_fit;
+
+  // Element access: inherited.
+  // Note: Deliberately not providing at().
+  using Base::data;
+
+  // Element access: subscript operator. Check index.
+  reference operator[](size_type n) {
+    DCHECK_LT(n, size());
+    return Base::operator[](n);
+  }
+  const_reference operator[](size_type n) const {
+    DCHECK_LT(n, size());
+    return Base::operator[](n);
+  }
+
+  // Element access: front(), back(). Check not empty.
+  reference front() { DCHECK(!empty()); return Base::front(); }
+  const_reference front() const { DCHECK(!empty()); return Base::front(); }
+  reference back() { DCHECK(!empty()); return Base::back(); }
+  const_reference back() const { DCHECK(!empty()); return Base::back(); }
+
+  // Modifiers: inherited.
+  using Base::assign;
+  using Base::push_back;
+  using Base::clear;
+  using Base::emplace_back;
+
+  // Modifiers: pop_back(). Check not empty.
+  void pop_back() { DCHECK(!empty()); Base::pop_back(); }
+
+  // Modifiers: swap(). Swap only with another dchecked_vector instead of a plain vector.
+  void swap(dchecked_vector& other) { Base::swap(other); }
+
+  // Modifiers: insert(). Check position.
+  iterator insert(const_iterator position, const value_type& value) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, value);
+  }
+  iterator insert(const_iterator position, size_type n, const value_type& value) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, n, value);
+  }
+  template <typename InputIterator>
+  iterator insert(const_iterator position, InputIterator first, InputIterator last) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, first, last);
+  }
+  iterator insert(const_iterator position, value_type&& value) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, std::move(value));
+  }
+  iterator insert(const_iterator position, std::initializer_list<value_type> il) {
+    DCHECK(cbegin() <= position && position <= cend());
+    return Base::insert(position, il);
+  }
+
+  // Modifiers: erase(). Check position.
+  iterator erase(const_iterator position) {
+    DCHECK(cbegin() <= position && position < cend());
+    return Base::erase(position);
+  }
+  iterator erase(const_iterator first, const_iterator last) {
+    DCHECK(cbegin() <= first && first <= cend());
+    DCHECK(first <= last && last <= cend());
+    return Base::erase(first, last);
+  }
+
+  // Modifiers: emplace(). Check position.
+  template <typename... Args>
+  iterator emplace(const_iterator position, Args&&... args) {
+    DCHECK(cbegin() <= position && position <= cend());
+    Base::emplace(position, std::forward(args...));
+  }
+
+  // Allocator.
+  using Base::get_allocator;
+};
+
+// Non-member swap(), found by argument-dependent lookup for an unqualified call.
+template <typename T, typename Alloc>
+void swap(dchecked_vector<T, Alloc>& lhs, dchecked_vector<T, Alloc>& rhs) {
+  lhs.swap(rhs);
+}
+
+// Non-member relational operators.
+template <typename T, typename Alloc>
+bool operator==(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin());
+}
+template <typename T, typename Alloc>
+bool operator!=(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return !(lhs == rhs);
+}
+template <typename T, typename Alloc>
+bool operator<(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+template <typename T, typename Alloc>
+bool operator<=(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return !(rhs < lhs);
+}
+template <typename T, typename Alloc>
+bool operator>(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return rhs < lhs;
+}
+template <typename T, typename Alloc>
+bool operator>=(const dchecked_vector<T, Alloc>& lhs, const dchecked_vector<T, Alloc>& rhs) {
+  return !(lhs < rhs);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_DCHECKED_VECTOR_H_
diff --git a/runtime/base/hash_map.h b/runtime/base/hash_map.h
index eab80ff..b18d586 100644
--- a/runtime/base/hash_map.h
+++ b/runtime/base/hash_map.h
@@ -51,8 +51,22 @@
 template <class Key, class Value, class EmptyFn,
     class HashFn = std::hash<Key>, class Pred = std::equal_to<Key>,
     class Alloc = std::allocator<std::pair<Key, Value>>>
-class HashMap : public HashSet<std::pair<Key, Value>, EmptyFn, HashMapWrapper<HashFn>,
-                               HashMapWrapper<Pred>, Alloc> {
+class HashMap : public HashSet<std::pair<Key, Value>,
+                               EmptyFn,
+                               HashMapWrapper<HashFn>,
+                               HashMapWrapper<Pred>,
+                               Alloc> {
+ private:
+  using Base = HashSet<std::pair<Key, Value>,
+                       EmptyFn,
+                       HashMapWrapper<HashFn>,
+                       HashMapWrapper<Pred>,
+                       Alloc>;
+
+ public:
+  HashMap() : Base() { }
+  explicit HashMap(const Alloc& alloc)
+      : Base(alloc) { }
 };
 
 }  // namespace art
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index d110fe3..f2b1cc0 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_BASE_HASH_SET_H_
 
 #include <functional>
+#include <iterator>
 #include <memory>
 #include <stdint.h>
 #include <utility>
@@ -45,7 +46,7 @@
   void MakeEmpty(T*& item) const {
     item = nullptr;
   }
-  bool IsEmpty(const T*& item) const {
+  bool IsEmpty(T* const& item) const {
     return item == nullptr;
   }
 };
@@ -59,7 +60,7 @@
     class Pred = std::equal_to<T>, class Alloc = std::allocator<T>>
 class HashSet {
   template <class Elem, class HashSetType>
-  class BaseIterator {
+  class BaseIterator : std::iterator<std::forward_iterator_tag, Elem> {
    public:
     BaseIterator(const BaseIterator&) = default;
     BaseIterator(BaseIterator&&) = default;
@@ -82,7 +83,7 @@
     }
 
     BaseIterator operator++(int) {
-      Iterator temp = *this;
+      BaseIterator temp = *this;
       this->index_ = this->NextNonEmptySlot(this->index_, hash_set_);
       return temp;
     }
@@ -96,7 +97,7 @@
       return &**this;
     }
 
-    // TODO: Operator -- --(int)
+    // TODO: Operator -- --(int)  (and use std::bidirectional_iterator_tag)
 
    private:
     size_t index_;
@@ -115,34 +116,87 @@
   };
 
  public:
+  using value_type = T;
+  using allocator_type = Alloc;
+  using reference = T&;
+  using const_reference = const T&;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using iterator = BaseIterator<T, HashSet>;
+  using const_iterator = BaseIterator<const T, const HashSet>;
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+
   static constexpr double kDefaultMinLoadFactor = 0.5;
   static constexpr double kDefaultMaxLoadFactor = 0.9;
   static constexpr size_t kMinBuckets = 1000;
 
-  typedef BaseIterator<T, HashSet> Iterator;
-  typedef BaseIterator<const T, const HashSet> ConstIterator;
-
   // If we don't own the data, this will create a new array which owns the data.
   void Clear() {
     DeallocateStorage();
-    AllocateStorage(1);
     num_elements_ = 0;
     elements_until_expand_ = 0;
   }
 
-  HashSet() : num_elements_(0), num_buckets_(0), owns_data_(false), data_(nullptr),
-      min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) {
-    Clear();
+  HashSet()
+      : num_elements_(0u),
+        num_buckets_(0u),
+        elements_until_expand_(0u),
+        owns_data_(false),
+        data_(nullptr),
+        min_load_factor_(kDefaultMinLoadFactor),
+        max_load_factor_(kDefaultMaxLoadFactor) {
   }
 
-  HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), owns_data_(false),
-      data_(nullptr) {
-    *this = other;
+  explicit HashSet(const allocator_type& alloc)
+      : allocfn_(alloc),
+        hashfn_(),
+        emptyfn_(),
+        pred_(),
+        num_elements_(0u),
+        num_buckets_(0u),
+        elements_until_expand_(0u),
+        owns_data_(false),
+        data_(nullptr),
+        min_load_factor_(kDefaultMinLoadFactor),
+        max_load_factor_(kDefaultMaxLoadFactor) {
   }
 
-  HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), owns_data_(false),
-      data_(nullptr) {
-    *this = std::move(other);
+  HashSet(const HashSet& other)
+      : allocfn_(other.allocfn_),
+        hashfn_(other.hashfn_),
+        emptyfn_(other.emptyfn_),
+        pred_(other.pred_),
+        num_elements_(other.num_elements_),
+        num_buckets_(0),
+        elements_until_expand_(other.elements_until_expand_),
+        owns_data_(false),
+        data_(nullptr),
+        min_load_factor_(other.min_load_factor_),
+        max_load_factor_(other.max_load_factor_) {
+    AllocateStorage(other.NumBuckets());
+    for (size_t i = 0; i < num_buckets_; ++i) {
+      ElementForIndex(i) = other.data_[i];
+    }
+  }
+
+  HashSet(HashSet&& other)
+      : allocfn_(std::move(other.allocfn_)),
+        hashfn_(std::move(other.hashfn_)),
+        emptyfn_(std::move(other.emptyfn_)),
+        pred_(std::move(other.pred_)),
+        num_elements_(other.num_elements_),
+        num_buckets_(other.num_buckets_),
+        elements_until_expand_(other.elements_until_expand_),
+        owns_data_(other.owns_data_),
+        data_(other.data_),
+        min_load_factor_(other.min_load_factor_),
+        max_load_factor_(other.max_load_factor_) {
+    other.num_elements_ = 0u;
+    other.num_buckets_ = 0u;
+    other.elements_until_expand_ = 0u;
+    other.owns_data_ = false;
+    other.data_ = nullptr;
   }
 
   // Construct from existing data.
@@ -199,32 +253,18 @@
   }
 
   HashSet& operator=(HashSet&& other) {
-    std::swap(data_, other.data_);
-    std::swap(num_buckets_, other.num_buckets_);
-    std::swap(num_elements_, other.num_elements_);
-    std::swap(elements_until_expand_, other.elements_until_expand_);
-    std::swap(min_load_factor_, other.min_load_factor_);
-    std::swap(max_load_factor_, other.max_load_factor_);
-    std::swap(owns_data_, other.owns_data_);
+    HashSet(std::move(other)).swap(*this);
     return *this;
   }
 
   HashSet& operator=(const HashSet& other) {
-    DeallocateStorage();
-    AllocateStorage(other.NumBuckets());
-    for (size_t i = 0; i < num_buckets_; ++i) {
-      ElementForIndex(i) = other.data_[i];
-    }
-    num_elements_ = other.num_elements_;
-    elements_until_expand_ = other.elements_until_expand_;
-    min_load_factor_ = other.min_load_factor_;
-    max_load_factor_ = other.max_load_factor_;
+    HashSet(other).swap(*this);  // NOLINT(runtime/explicit) - a case of lint gone mad.
     return *this;
   }
 
   // Lower case for c++11 for each.
-  Iterator begin() {
-    Iterator ret(this, 0);
+  iterator begin() {
+    iterator ret(this, 0);
     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
       ++ret;  // Skip all the empty slots.
     }
@@ -232,8 +272,8 @@
   }
 
   // Lower case for c++11 for each. const version.
-  ConstIterator begin() const {
-    ConstIterator ret(this, 0);
+  const_iterator begin() const {
+    const_iterator ret(this, 0);
     if (num_buckets_ != 0 && IsFreeSlot(ret.index_)) {
       ++ret;  // Skip all the empty slots.
     }
@@ -241,13 +281,13 @@
   }
 
   // Lower case for c++11 for each.
-  Iterator end() {
-    return Iterator(this, NumBuckets());
+  iterator end() {
+    return iterator(this, NumBuckets());
   }
 
   // Lower case for c++11 for each. const version.
-  ConstIterator end() const {
-    return ConstIterator(this, NumBuckets());
+  const_iterator end() const {
+    return const_iterator(this, NumBuckets());
   }
 
   bool Empty() {
@@ -262,7 +302,7 @@
   // and set the empty slot to be the location we just moved from.
   // Relies on maintaining the invariant that there's no empty slots from the 'ideal' index of an
   // element to its actual location/index.
-  Iterator Erase(Iterator it) {
+  iterator Erase(iterator it) {
     // empty_index is the index that will become empty.
     size_t empty_index = it.index_;
     DCHECK(!IsFreeSlot(empty_index));
@@ -313,23 +353,23 @@
   // Set of Class* sorted by name, want to find a class with a name but can't allocate a dummy
   // object in the heap for performance solution.
   template <typename K>
-  Iterator Find(const K& key) {
+  iterator Find(const K& key) {
     return FindWithHash(key, hashfn_(key));
   }
 
   template <typename K>
-  ConstIterator Find(const K& key) const {
+  const_iterator Find(const K& key) const {
     return FindWithHash(key, hashfn_(key));
   }
 
   template <typename K>
-  Iterator FindWithHash(const K& key, size_t hash) {
-    return Iterator(this, FindIndex(key, hash));
+  iterator FindWithHash(const K& key, size_t hash) {
+    return iterator(this, FindIndex(key, hash));
   }
 
   template <typename K>
-  ConstIterator FindWithHash(const K& key, size_t hash) const {
-    return ConstIterator(this, FindIndex(key, hash));
+  const_iterator FindWithHash(const K& key, size_t hash) const {
+    return const_iterator(this, FindIndex(key, hash));
   }
 
   // Insert an element, allows duplicates.
@@ -352,6 +392,26 @@
     return num_elements_;
   }
 
+  void swap(HashSet& other) {
+    // Use argument-dependent lookup with fall-back to std::swap() for function objects.
+    using std::swap;
+    swap(allocfn_, other.allocfn_);
+    swap(hashfn_, other.hashfn_);
+    swap(emptyfn_, other.emptyfn_);
+    swap(pred_, other.pred_);
+    std::swap(data_, other.data_);
+    std::swap(num_buckets_, other.num_buckets_);
+    std::swap(num_elements_, other.num_elements_);
+    std::swap(elements_until_expand_, other.elements_until_expand_);
+    std::swap(min_load_factor_, other.min_load_factor_);
+    std::swap(max_load_factor_, other.max_load_factor_);
+    std::swap(owns_data_, other.owns_data_);
+  }
+
+  allocator_type get_allocator() const {
+    return allocfn_;
+  }
+
   void ShrinkToMaximumLoad() {
     Resize(Size() / max_load_factor_);
   }
@@ -429,7 +489,7 @@
   }
 
   // Find the hash table slot for an element, or return NumBuckets() if not found.
-  // This value for not found is important so that Iterator(this, FindIndex(...)) == end().
+  // This value for not found is important so that iterator(this, FindIndex(...)) == end().
   template <typename K>
   size_t FindIndex(const K& element, size_t hash) const {
     // Guard against failing to get an element for a non-existing index.
@@ -560,6 +620,12 @@
   double max_load_factor_;
 };
 
+template <class T, class EmptyFn, class HashFn, class Pred, class Alloc>
+void swap(HashSet<T, EmptyFn, HashFn, Pred, Alloc>& lhs,
+          HashSet<T, EmptyFn, HashFn, Pred, Alloc>& rhs) {
+  lhs.swap(rhs);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_HASH_SET_H_
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index b2c5677..30bfb4a 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -50,6 +50,7 @@
 Mutex* Locks::modify_ldt_lock_ = nullptr;
 MutatorMutex* Locks::mutator_lock_ = nullptr;
 Mutex* Locks::profiler_lock_ = nullptr;
+ReaderWriterMutex* Locks::oat_file_manager_lock_ = nullptr;
 Mutex* Locks::reference_processor_lock_ = nullptr;
 Mutex* Locks::reference_queue_cleared_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_finalizer_references_lock_ = nullptr;
@@ -940,6 +941,7 @@
     DCHECK(classlinker_classes_lock_ != nullptr);
     DCHECK(deoptimization_lock_ != nullptr);
     DCHECK(heap_bitmap_lock_ != nullptr);
+    DCHECK(oat_file_manager_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
     DCHECK(jni_libraries_lock_ != nullptr);
     DCHECK(logging_lock_ != nullptr);
@@ -1028,6 +1030,10 @@
       modify_ldt_lock_ = new Mutex("modify_ldt lock", current_lock_level);
     }
 
+    UPDATE_CURRENT_LOCK_LEVEL(kOatFileManagerLock);
+    DCHECK(oat_file_manager_lock_ == nullptr);
+    oat_file_manager_lock_ = new ReaderWriterMutex("OatFile manager lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kInternTableLock);
     DCHECK(intern_table_lock_ == nullptr);
     intern_table_lock_ = new Mutex("InternTable lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3da806b..17f6a03 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -83,6 +83,7 @@
   kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
+  kOatFileManagerLock,
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
   kDefaultMutexLevel,
@@ -644,8 +645,11 @@
   // Guards modification of the LDT on x86.
   static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
 
+  // Guards opened oat files in OatFileManager.
+  static ReaderWriterMutex* oat_file_manager_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
+
   // Guards intern table.
-  static Mutex* intern_table_lock_ ACQUIRED_AFTER(modify_ldt_lock_);
+  static Mutex* intern_table_lock_ ACQUIRED_AFTER(oat_file_manager_lock_);
 
   // Guards reference processor.
   static Mutex* reference_processor_lock_ ACQUIRED_AFTER(intern_table_lock_);
diff --git a/runtime/base/scoped_arena_containers.h b/runtime/base/scoped_arena_containers.h
index eecc55f..7c64449 100644
--- a/runtime/base/scoped_arena_containers.h
+++ b/runtime/base/scoped_arena_containers.h
@@ -21,9 +21,10 @@
 #include <queue>
 #include <set>
 #include <unordered_map>
-#include <vector>
+#include <utility>
 
 #include "arena_containers.h"  // For ArenaAllocatorAdapterKind.
+#include "base/dchecked_vector.h"
 #include "scoped_arena_allocator.h"
 #include "safe_map.h"
 
@@ -47,7 +48,7 @@
 using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>;
 
 template <typename T>
-using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
+using ScopedArenaVector = dchecked_vector<T, ScopedArenaAllocatorAdapter<T>>;
 
 template <typename T, typename Comparator = std::less<T>>
 using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>;
@@ -157,13 +158,15 @@
     arena_stack_->MakeInaccessible(p, sizeof(T) * n);
   }
 
-  void construct(pointer p, const_reference val) {
+  template <typename U, typename... Args>
+  void construct(U* p, Args&&... args) {
     // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
-    new (static_cast<void*>(p)) value_type(val);
+    ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
   }
-  void destroy(pointer p) {
+  template <typename U>
+  void destroy(U* p) {
     // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
-    p->~value_type();
+    p->~U();
   }
 
  private:
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index acb39c5..9349fe3 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -58,6 +58,7 @@
 #include "oat_file.h"
 #include "oat_file-inl.h"
 #include "oat_file_assistant.h"
+#include "oat_file_manager.h"
 #include "object_lock.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -89,9 +90,6 @@
 
 static constexpr bool kSanityCheckObjects = kIsDebugBuild;
 
-// For b/21333911.
-static constexpr bool kDuplicateClassesCheck = false;
-
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_REQUIRES(Locks::mutator_lock_);
@@ -696,343 +694,6 @@
   }
 }
 
-const OatFile* ClassLinker::RegisterOatFile(const OatFile* oat_file) {
-  WriterMutexLock mu(Thread::Current(), dex_lock_);
-  if (kIsDebugBuild) {
-    for (size_t i = 0; i < oat_files_.size(); ++i) {
-      CHECK_NE(oat_file, oat_files_[i]) << oat_file->GetLocation();
-    }
-  }
-  VLOG(class_linker) << "Registering " << oat_file->GetLocation();
-  oat_files_.push_back(oat_file);
-  return oat_file;
-}
-
-OatFile& ClassLinker::GetImageOatFile(gc::space::ImageSpace* space) {
-  VLOG(startup) << "ClassLinker::GetImageOatFile entering";
-  OatFile* oat_file = space->ReleaseOatFile();
-  CHECK_EQ(RegisterOatFile(oat_file), oat_file);
-  VLOG(startup) << "ClassLinker::GetImageOatFile exiting";
-  return *oat_file;
-}
-
-class DexFileAndClassPair : ValueObject {
- public:
-  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
-     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
-       dex_file_(dex_file),
-       current_class_index_(current_class_index),
-       from_loaded_oat_(from_loaded_oat) {}
-
-  DexFileAndClassPair(const DexFileAndClassPair&) = default;
-
-  DexFileAndClassPair& operator=(const DexFileAndClassPair& rhs) {
-    cached_descriptor_ = rhs.cached_descriptor_;
-    dex_file_ = rhs.dex_file_;
-    current_class_index_ = rhs.current_class_index_;
-    from_loaded_oat_ = rhs.from_loaded_oat_;
-    return *this;
-  }
-
-  const char* GetCachedDescriptor() const {
-    return cached_descriptor_;
-  }
-
-  bool operator<(const DexFileAndClassPair& rhs) const {
-    const char* lhsDescriptor = cached_descriptor_;
-    const char* rhsDescriptor = rhs.cached_descriptor_;
-    int cmp = strcmp(lhsDescriptor, rhsDescriptor);
-    if (cmp != 0) {
-      // Note that the order must be reversed. We want to iterate over the classes in dex files.
-      // They are sorted lexicographically. Thus, the priority-queue must be a min-queue.
-      return cmp > 0;
-    }
-    return dex_file_ < rhs.dex_file_;
-  }
-
-  bool DexFileHasMoreClasses() const {
-    return current_class_index_ + 1 < dex_file_->NumClassDefs();
-  }
-
-  DexFileAndClassPair GetNext() const {
-    return DexFileAndClassPair(dex_file_, current_class_index_ + 1, from_loaded_oat_);
-  }
-
-  size_t GetCurrentClassIndex() const {
-    return current_class_index_;
-  }
-
-  bool FromLoadedOat() const {
-    return from_loaded_oat_;
-  }
-
-  const DexFile* GetDexFile() const {
-    return dex_file_;
-  }
-
-  void DeleteDexFile() {
-    delete dex_file_;
-    dex_file_ = nullptr;
-  }
-
- private:
-  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
-    return dex_file->StringByTypeIdx(class_def.class_idx_);
-  }
-
-  const char* cached_descriptor_;
-  const DexFile* dex_file_;
-  size_t current_class_index_;
-  bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
-                          // and what was loaded before. Any old duplicates must have been
-                          // OK, and any new "internal" duplicates are as well (they must
-                          // be from multidex, which resolves correctly).
-};
-
-static void AddDexFilesFromOat(const OatFile* oat_file,
-                               bool already_loaded,
-                               std::priority_queue<DexFileAndClassPair>* heap) {
-  const std::vector<const OatDexFile*>& oat_dex_files = oat_file->GetOatDexFiles();
-  for (const OatDexFile* oat_dex_file : oat_dex_files) {
-    std::string error;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
-    if (dex_file.get() == nullptr) {
-      LOG(WARNING) << "Could not create dex file from oat file: " << error;
-    } else {
-      if (dex_file->NumClassDefs() > 0U) {
-        heap->emplace(dex_file.release(), 0U, already_loaded);
-      }
-    }
-  }
-}
-
-static void AddNext(DexFileAndClassPair* original,
-                    std::priority_queue<DexFileAndClassPair>* heap) {
-  if (original->DexFileHasMoreClasses()) {
-    heap->push(original->GetNext());
-  } else {
-    // Need to delete the dex file.
-    original->DeleteDexFile();
-  }
-}
-
-static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) {
-  while (!heap->empty()) {
-    delete heap->top().GetDexFile();
-    heap->pop();
-  }
-}
-
-const OatFile* ClassLinker::GetBootOatFile() {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-  if (image_space == nullptr) {
-    return nullptr;
-  }
-  return image_space->GetOatFile();
-}
-
-const OatFile* ClassLinker::GetPrimaryOatFile() {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-  const OatFile* boot_oat_file = GetBootOatFile();
-  if (boot_oat_file != nullptr) {
-    for (const OatFile* oat_file : oat_files_) {
-      if (oat_file != boot_oat_file) {
-        return oat_file;
-      }
-    }
-  }
-  return nullptr;
-}
-
-// Check for class-def collisions in dex files.
-//
-// This works by maintaining a heap with one class from each dex file, sorted by the class
-// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
-// against the following top element. If the descriptor is the same, it is now checked whether
-// the two elements agree on whether their dex file was from an already-loaded oat-file or the
-// new oat file. Any disagreement indicates a collision.
-bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) {
-  if (!kDuplicateClassesCheck) {
-    return false;
-  }
-
-  // Dex files are registered late - once a class is actually being loaded. We have to compare
-  // against the open oat files. Take the dex_lock_ that protects oat_files_ accesses.
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-
-  std::priority_queue<DexFileAndClassPair> queue;
-
-  // Add dex files from already loaded oat files, but skip boot.
-  {
-    const OatFile* boot_oat = GetBootOatFile();
-    for (const OatFile* loaded_oat_file : oat_files_) {
-      if (loaded_oat_file == boot_oat) {
-        continue;
-      }
-      AddDexFilesFromOat(loaded_oat_file, true, &queue);
-    }
-  }
-
-  if (queue.empty()) {
-    // No other oat files, return early.
-    return false;
-  }
-
-  // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, false, &queue);
-
-  // Now drain the queue.
-  while (!queue.empty()) {
-    DexFileAndClassPair compare_pop = queue.top();
-    queue.pop();
-
-    // Compare against the following elements.
-    while (!queue.empty()) {
-      DexFileAndClassPair top = queue.top();
-
-      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
-        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
-        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
-          *error_msg =
-              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
-                           compare_pop.GetCachedDescriptor(),
-                           compare_pop.GetDexFile()->GetLocation().c_str(),
-                           top.GetDexFile()->GetLocation().c_str());
-          FreeDexFilesInHeap(&queue);
-          return true;
-        }
-        // Pop it.
-        queue.pop();
-        AddNext(&top, &queue);
-      } else {
-        // Something else. Done here.
-        break;
-      }
-    }
-    AddNext(&compare_pop, &queue);
-  }
-
-  return false;
-}
-
-std::vector<std::unique_ptr<const DexFile>> ClassLinker::OpenDexFilesFromOat(
-    const char* dex_location, const char* oat_location,
-    std::vector<std::string>* error_msgs) {
-  CHECK(error_msgs != nullptr);
-
-  // Verify we aren't holding the mutator lock, which could starve GC if we
-  // have to generate or relocate an oat file.
-  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
-
-  OatFileAssistant oat_file_assistant(dex_location, oat_location, kRuntimeISA,
-     !Runtime::Current()->IsAotCompiler());
-
-  // Lock the target oat location to avoid races generating and loading the
-  // oat file.
-  std::string error_msg;
-  if (!oat_file_assistant.Lock(&error_msg)) {
-    // Don't worry too much if this fails. If it does fail, it's unlikely we
-    // can generate an oat file anyway.
-    VLOG(class_linker) << "OatFileAssistant::Lock: " << error_msg;
-  }
-
-  // Check if we already have an up-to-date oat file open.
-  const OatFile* source_oat_file = nullptr;
-  {
-    ReaderMutexLock mu(Thread::Current(), dex_lock_);
-    for (const OatFile* oat_file : oat_files_) {
-      CHECK(oat_file != nullptr);
-      if (oat_file_assistant.GivenOatFileIsUpToDate(*oat_file)) {
-        source_oat_file = oat_file;
-        break;
-      }
-    }
-  }
-
-  // If we didn't have an up-to-date oat file open, try to load one from disk.
-  if (source_oat_file == nullptr) {
-    // Update the oat file on disk if we can. This may fail, but that's okay.
-    // Best effort is all that matters here.
-    if (!oat_file_assistant.MakeUpToDate(&error_msg)) {
-      LOG(WARNING) << error_msg;
-    }
-
-    // Get the oat file on disk.
-    std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
-    if (oat_file.get() != nullptr) {
-      // Take the file only if it has no collisions, or we must take it because of preopting.
-      bool accept_oat_file = !HasCollisions(oat_file.get(), &error_msg);
-      if (!accept_oat_file) {
-        // Failed the collision check. Print warning.
-        if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-          LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
-                       << dex_location;
-        } else {
-          LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
-                          " load classes for " << dex_location;
-        }
-        LOG(WARNING) << error_msg;
-
-        // However, if the app was part of /system and preopted, there is no original dex file
-        // available. In that case grudgingly accept the oat file.
-        if (!DexFile::MaybeDex(dex_location)) {
-          accept_oat_file = true;
-          LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
-                       << "Allow oat file use. This is potentially dangerous.";
-        }
-      }
-
-      if (accept_oat_file) {
-        source_oat_file = oat_file.release();
-        RegisterOatFile(source_oat_file);
-      }
-    }
-  }
-
-  std::vector<std::unique_ptr<const DexFile>> dex_files;
-
-  // Load the dex files from the oat file.
-  if (source_oat_file != nullptr) {
-    dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
-    if (dex_files.empty()) {
-      error_msgs->push_back("Failed to open dex files from "
-          + source_oat_file->GetLocation());
-    }
-  }
-
-  // Fall back to running out of the original dex file if we couldn't load any
-  // dex_files from the oat file.
-  if (dex_files.empty()) {
-    if (oat_file_assistant.HasOriginalDexFiles()) {
-      if (Runtime::Current()->IsDexFileFallbackEnabled()) {
-        if (!DexFile::Open(dex_location, dex_location, &error_msg, &dex_files)) {
-          LOG(WARNING) << error_msg;
-          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
-        }
-      } else {
-        error_msgs->push_back("Fallback mode disabled, skipping dex files.");
-      }
-    } else {
-      error_msgs->push_back("No original dex files found for dex location "
-          + std::string(dex_location));
-    }
-  }
-  return dex_files;
-}
-
-const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) {
-  ReaderMutexLock mu(Thread::Current(), dex_lock_);
-  for (size_t i = 0; i < oat_files_.size(); i++) {
-    const OatFile* oat_file = oat_files_[i];
-    DCHECK(oat_file != nullptr);
-    if (oat_file->GetLocation() == oat_location) {
-      return oat_file;
-    }
-  }
-  return nullptr;
-}
-
 static void SanityCheckArtMethod(ArtMethod* m,
                                  mirror::Class* expected_class,
                                  gc::space::ImageSpace* space)
@@ -1169,16 +830,17 @@
   CHECK(space != nullptr);
   image_pointer_size_ = space->GetImageHeader().GetPointerSize();
   dex_cache_image_class_lookup_required_ = true;
-  OatFile& oat_file = GetImageOatFile(space);
-  CHECK_EQ(oat_file.GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
-  CHECK_EQ(oat_file.GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
-  const char* image_file_location = oat_file.GetOatHeader().
+  const OatFile* oat_file = runtime->GetOatFileManager().RegisterImageOatFile(space);
+  DCHECK(oat_file != nullptr);
+  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
+  CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(), 0U);
+  const char* image_file_location = oat_file->GetOatHeader().
       GetStoreValueByKey(OatHeader::kImageLocationKey);
   CHECK(image_file_location == nullptr || *image_file_location == 0);
-  quick_resolution_trampoline_ = oat_file.GetOatHeader().GetQuickResolutionTrampoline();
-  quick_imt_conflict_trampoline_ = oat_file.GetOatHeader().GetQuickImtConflictTrampoline();
-  quick_generic_jni_trampoline_ = oat_file.GetOatHeader().GetQuickGenericJniTrampoline();
-  quick_to_interpreter_bridge_trampoline_ = oat_file.GetOatHeader().GetQuickToInterpreterBridge();
+  quick_resolution_trampoline_ = oat_file->GetOatHeader().GetQuickResolutionTrampoline();
+  quick_imt_conflict_trampoline_ = oat_file->GetOatHeader().GetQuickImtConflictTrampoline();
+  quick_generic_jni_trampoline_ = oat_file->GetOatHeader().GetQuickGenericJniTrampoline();
+  quick_to_interpreter_bridge_trampoline_ = oat_file->GetOatHeader().GetQuickToInterpreterBridge();
   StackHandleScope<2> hs(self);
   mirror::Object* dex_caches_object = space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
   Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches(
@@ -1200,20 +862,20 @@
                                                           java_lang_Object->GetObjectSize(),
                                                           VoidFunctor()));
 
-  CHECK_EQ(oat_file.GetOatHeader().GetDexFileCount(),
+  CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(),
            static_cast<uint32_t>(dex_caches->GetLength()));
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     StackHandleScope<1> hs2(self);
     Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
-    const OatFile::OatDexFile* oat_dex_file = oat_file.GetOatDexFile(dex_file_location.c_str(),
-                                                                     nullptr);
-    CHECK(oat_dex_file != nullptr) << oat_file.GetLocation() << " " << dex_file_location;
+    const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
+                                                                      nullptr);
+    CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location;
     std::string error_msg;
     std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
-    if (dex_file.get() == nullptr) {
+    if (dex_file == nullptr) {
       LOG(FATAL) << "Failed to open dex file " << dex_file_location
-                 << " from within oat file " << oat_file.GetLocation()
+                 << " from within oat file " << oat_file->GetLocation()
                  << " error '" << error_msg << "'";
       UNREACHABLE();
     }
@@ -1361,9 +1023,9 @@
   class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
   VisitClassRoots(visitor, flags);
   array_iftable_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  for (GcRoot<mirror::Class>& root : find_array_class_cache_) {
-    root.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal));
-  }
+  // Instead of visiting the find_array_class_cache_ drop it so that it doesn't prevent class
+  // unloading if we are marking roots.
+  DropFindArrayClassCache();
 }
 
 class VisitClassLoaderClassesVisitor : public ClassLoaderVisitor {
@@ -1508,7 +1170,6 @@
   mirror::IntArray::ResetArrayClass();
   mirror::LongArray::ResetArrayClass();
   mirror::ShortArray::ResetArrayClass();
-  STLDeleteElements(&oat_files_);
   Thread* const self = Thread::Current();
   JavaVMExt* const vm = Runtime::Current()->GetJavaVM();
   for (const ClassLoaderData& data : class_loaders_) {
@@ -6075,7 +5736,8 @@
 }
 
 bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) {
-  if (Runtime::Current()->UseJit()) {
+  Runtime* const runtime = Runtime::Current();
+  if (runtime->UseJit()) {
     // JIT can have direct code pointers from any method to any other method.
     return true;
   }
@@ -6097,13 +5759,7 @@
   } else {
     // The method can be called outside its own oat file. Therefore it won't be called using its
     // direct code pointer only if all loaded oat files have been compiled in PIC mode.
-    ReaderMutexLock mu(Thread::Current(), dex_lock_);
-    for (const OatFile* oat_file : oat_files_) {
-      if (!oat_file->IsPic()) {
-        return true;
-      }
-    }
-    return false;
+    return runtime->GetOatFileManager().HaveNonPicOatFile();
   }
 }
 
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 7f3e938..76cb0a6 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -325,17 +325,10 @@
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  const OatFile* RegisterOatFile(const OatFile* oat_file)
-      REQUIRES(!dex_lock_);
-
   const std::vector<const DexFile*>& GetBootClassPath() {
     return boot_class_path_;
   }
 
-  // Returns the first non-image oat file in the class path.
-  const OatFile* GetPrimaryOatFile()
-      REQUIRES(!dex_lock_);
-
   void VisitClasses(ClassVisitor* visitor)
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -364,26 +357,6 @@
       REQUIRES(!dex_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Finds or creates the oat file holding dex_location. Then loads and returns
-  // all corresponding dex files (there may be more than one dex file loaded
-  // in the case of multidex).
-  // This may return the original, unquickened dex files if the oat file could
-  // not be generated.
-  //
-  // Returns an empty vector if the dex files could not be loaded. In this
-  // case, there will be at least one error message returned describing why no
-  // dex files could not be loaded. The 'error_msgs' argument must not be
-  // null, regardless of whether there is an error or not.
-  //
-  // This method should not be called with the mutator_lock_ held, because it
-  // could end up starving GC if we need to generate or relocate any oat
-  // files.
-  std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
-      const char* dex_location,
-      const char* oat_location,
-      std::vector<std::string>* error_msgs)
-      REQUIRES(!dex_lock_, !Locks::mutator_lock_);
-
   // Allocate an instance of a java.lang.Object.
   mirror::Object* AllocObject(Thread* self)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -581,10 +554,6 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  OatFile& GetImageOatFile(gc::space::ImageSpace* space)
-      REQUIRES(!dex_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   void FinishInit(Thread* self)
   SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
@@ -758,12 +727,6 @@
     return dex_caches_;
   }
 
-  const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
-      REQUIRES(!dex_lock_);
-
-  // Returns the boot image oat file.
-  const OatFile* GetBootOatFile() SHARED_REQUIRES(dex_lock_);
-
   void CreateProxyConstructor(Handle<mirror::Class> klass, ArtMethod* out)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateProxyMethod(Handle<mirror::Class> klass, ArtMethod* prototype, ArtMethod* out)
@@ -813,9 +776,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  // Check for duplicate class definitions of the given oat file against all open oat files.
-  bool HasCollisions(const OatFile* oat_file, std::string* error_msg) REQUIRES(!dex_lock_);
-
   bool HasInitWithString(Thread* self, const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
 
@@ -834,7 +794,6 @@
   // JNI weak globals to allow dex caches to get unloaded. We lazily delete weak globals when we
   // register new dex files.
   std::list<jweak> dex_caches_ GUARDED_BY(dex_lock_);
-  std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
 
   // This contains the class loaders which have class tables. It is populated by
   // InsertClassTableForClassLoader.
@@ -880,8 +839,8 @@
   // Image pointer size.
   size_t image_pointer_size_;
 
+  friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
-  friend class ImageDumper;  // for FindOpenedOatFileFromOatLocation
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
   ART_FRIEND_TEST(mirror::DexCacheTest, Open);  // for AllocDexCache
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index b19381d..a4f95b6 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -612,7 +612,7 @@
         // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
         // This prevents us from having any pending deoptimization request when the debugger attaches
         // to us again while no event has been requested yet.
-        MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+        MutexLock mu(self, *Locks::deoptimization_lock_);
         deoptimization_requests_.clear();
         full_deoptimization_event_count_ = 0U;
       }
@@ -5043,4 +5043,13 @@
   method_ = soa.EncodeMethod(m);
 }
 
+void Dbg::VisitRoots(RootVisitor* visitor) {
+  // Visit breakpoint roots, used to prevent unloading of methods with breakpoints.
+  ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+  BufferedRootVisitor<128> root_visitor(visitor, RootInfo(kRootVMInternal));
+  for (Breakpoint& breakpoint : gBreakpoints) {
+    breakpoint.Method()->VisitRoots(root_visitor, sizeof(void*));
+  }
+}
+
 }  // namespace art
diff --git a/runtime/debugger.h b/runtime/debugger.h
index b3617e4..e908304 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -646,6 +646,7 @@
   static void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Visit breakpoint roots, used to prevent unloading of methods with breakpoints.
   static void VisitRoots(RootVisitor* visitor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
deleted file mode 100644
index 72c2e0a..0000000
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method-inl.h"
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "interpreter/interpreter.h"
-#include "mirror/object-inl.h"
-#include "reflection.h"
-#include "runtime.h"
-#include "stack.h"
-
-namespace art {
-
-extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                   ShadowFrame* shadow_frame, JValue* result) {
-  ArtMethod* method = shadow_frame->GetMethod();
-  // Ensure static methods are initialized.
-  if (method->IsStatic()) {
-    mirror::Class* declaringClass = method->GetDeclaringClass();
-    if (UNLIKELY(!declaringClass->IsInitialized())) {
-      self->PushShadowFrame(shadow_frame);
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
-      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true,
-                                                                            true))) {
-        self->PopShadowFrame();
-        DCHECK(self->IsExceptionPending());
-        return;
-      }
-      self->PopShadowFrame();
-      CHECK(h_class->IsInitializing());
-      // Reload from shadow frame in case the method moved, this is faster than adding a handle.
-      method = shadow_frame->GetMethod();
-    }
-  }
-  uint16_t arg_offset = (code_item == nullptr) ? 0 : code_item->registers_size_ - code_item->ins_size_;
-  method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
-                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                 result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
-}
-
-}  // namespace art
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.h b/runtime/entrypoints/interpreter/interpreter_entrypoints.h
deleted file mode 100644
index 0952214..0000000
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_ENTRYPOINTS_INTERPRETER_INTERPRETER_ENTRYPOINTS_H_
-#define ART_RUNTIME_ENTRYPOINTS_INTERPRETER_INTERPRETER_ENTRYPOINTS_H_
-
-#include "base/macros.h"
-#include "dex_file.h"
-#include "offsets.h"
-
-#define INTERPRETER_ENTRYPOINT_OFFSET(ptr_size, x) \
-    Thread::InterpreterEntryPointOffset<ptr_size>(OFFSETOF_MEMBER(InterpreterEntryPoints, x))
-
-namespace art {
-
-union JValue;
-class ShadowFrame;
-class Thread;
-
-// Pointers to functions that are called by interpreter trampolines via thread-local storage.
-struct PACKED(4) InterpreterEntryPoints {
-  void (*pInterpreterToInterpreterBridge)(Thread* self, const DexFile::CodeItem* code_item,
-                                          ShadowFrame* shadow_frame, JValue* result);
-  void (*pInterpreterToCompiledCodeBridge)(Thread* self, const DexFile::CodeItem* code_item,
-                                           ShadowFrame* shadow_frame, JValue* result);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_ENTRYPOINTS_INTERPRETER_INTERPRETER_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/jni/jni_entrypoints.h b/runtime/entrypoints/jni/jni_entrypoints.h
index 6fb0560..9c1b0dc 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.h
+++ b/runtime/entrypoints/jni/jni_entrypoints.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_JNI_JNI_ENTRYPOINTS_H_
 #define ART_RUNTIME_ENTRYPOINTS_JNI_JNI_ENTRYPOINTS_H_
 
+#include "jni.h"
+
 #include "base/macros.h"
 #include "offsets.h"
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5d3ac73..c5492f1 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -719,7 +719,7 @@
     uint16_t num_regs = code_item->registers_size_;
     // No last shadow coming from quick.
     ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-        CREATE_SHADOW_FRAME(num_regs, nullptr, method, 0);
+        CREATE_SHADOW_FRAME(num_regs, /* link */ nullptr, method, /* dex pc */ 0);
     ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
     size_t first_arg_reg = code_item->registers_size_ - code_item->ins_size_;
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c37d159..78f56ee 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -114,7 +114,7 @@
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_functions,
                         sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, interpreter_entrypoints,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, jni_entrypoints,
                         sizeof(void*) * 6);
 
     // Skip across the entrypoints structures.
@@ -137,15 +137,6 @@
                        thread_tlsptr_end);
   }
 
-  void CheckInterpreterEntryPoints() {
-    CHECKED(OFFSETOF_MEMBER(InterpreterEntryPoints, pInterpreterToInterpreterBridge) == 0,
-            InterpreterEntryPoints_start_with_i2i);
-    EXPECT_OFFSET_DIFFNP(InterpreterEntryPoints, pInterpreterToInterpreterBridge,
-                         pInterpreterToCompiledCodeBridge, sizeof(void*));
-    CHECKED(OFFSETOF_MEMBER(InterpreterEntryPoints, pInterpreterToCompiledCodeBridge)
-            + sizeof(void*) == sizeof(InterpreterEntryPoints), InterpreterEntryPoints_all);
-  }
-
   void CheckJniEntryPoints() {
     CHECKED(OFFSETOF_MEMBER(JniEntryPoints, pDlsymLookup) == 0,
             JniEntryPoints_start_with_dlsymlookup);
@@ -321,10 +312,6 @@
   CheckThreadOffsets();
 }
 
-TEST_F(EntrypointsOrderTest, InterpreterEntryPoints) {
-  CheckInterpreterEntryPoints();
-}
-
 TEST_F(EntrypointsOrderTest, JniEntryPoints) {
   CheckJniEntryPoints();
 }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 0a7a69f..d2d12af 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "base/stl_util.h"
+#include "debugger.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/reference_processor.h"
@@ -385,6 +386,10 @@
     TimingLogger::ScopedTiming split5("VisitNonThreadRoots", GetTimings());
     Runtime::Current()->VisitNonThreadRoots(this);
   }
+  {
+    TimingLogger::ScopedTiming split6("Dbg::VisitRoots", GetTimings());
+    Dbg::VisitRoots(this);
+  }
   Runtime::Current()->GetHeap()->VisitAllocationRecords(this);
 
   // Immune spaces.
@@ -401,7 +406,7 @@
 
   Thread* self = Thread::Current();
   {
-    TimingLogger::ScopedTiming split6("ProcessMarkStack", GetTimings());
+    TimingLogger::ScopedTiming split7("ProcessMarkStack", GetTimings());
     // We transition through three mark stack modes (thread-local, shared, GC-exclusive). The
     // primary reasons are the fact that we need to use a checkpoint to process thread-local mark
     // stacks, but after we disable weak refs accesses, we can't use a checkpoint due to a deadlock
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1923d24..ce64b10 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -789,10 +789,13 @@
 
   CHECK(image_header.GetOatDataBegin() != nullptr);
 
-  OatFile* oat_file = OatFile::Open(oat_filename, oat_filename, image_header.GetOatDataBegin(),
+  OatFile* oat_file = OatFile::Open(oat_filename,
+                                    oat_filename,
+                                    image_header.GetOatDataBegin(),
                                     image_header.GetOatFileBegin(),
                                     !Runtime::Current()->IsAotCompiler(),
-                                    nullptr, error_msg);
+                                    nullptr,
+                                    error_msg);
   if (oat_file == nullptr) {
     *error_msg = StringPrintf("Failed to open oat file '%s' referenced from image %s: %s",
                               oat_filename.c_str(), GetName(), error_msg->c_str());
@@ -839,15 +842,13 @@
   return true;
 }
 
-
 const OatFile* ImageSpace::GetOatFile() const {
   return oat_file_non_owned_;
 }
 
-
-OatFile* ImageSpace::ReleaseOatFile() {
-  CHECK(oat_file_.get() != nullptr);
-  return oat_file_.release();
+std::unique_ptr<const OatFile> ImageSpace::ReleaseOatFile() {
+  CHECK(oat_file_ != nullptr);
+  return std::move(oat_file_);
 }
 
 void ImageSpace::Dump(std::ostream& os) const {
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 215c18b..9920742 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -62,9 +62,8 @@
   const OatFile* GetOatFile() const;
 
   // Releases the OatFile from the ImageSpace so it can be transfer to
-  // the caller, presumably the ClassLinker.
-  OatFile* ReleaseOatFile()
-      SHARED_REQUIRES(Locks::mutator_lock_);
+  // the caller, presumably the OatFileManager.
+  std::unique_ptr<const OatFile> ReleaseOatFile();
 
   void VerifyImageAllocations()
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index c9ba6cf..a5b63b4 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -17,6 +17,7 @@
 #include "indirect_reference_table-inl.h"
 
 #include "jni_internal.h"
+#include "nth_caller_visitor.h"
 #include "reference_table.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index f783b04..b010504 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-#include "interpreter_common.h"
+#include "interpreter.h"
 
 #include <limits>
 
+#include "interpreter_common.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -332,7 +333,7 @@
   // Set up shadow frame with matching number of reference slots to vregs.
   ShadowFrame* last_shadow_frame = self->GetManagedStack()->GetTopShadowFrame();
   ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, 0);
+      CREATE_SHADOW_FRAME(num_regs, last_shadow_frame, method, /* dex pc */ 0);
   ShadowFrame* shadow_frame = shadow_frame_unique_ptr.get();
   self->PushShadowFrame(shadow_frame);
 
@@ -448,8 +449,8 @@
   return Execute(self, code_item, *shadow_frame, JValue());
 }
 
-extern "C" void artInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                  ShadowFrame* shadow_frame, JValue* result) {
+void ArtInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
+                                       ShadowFrame* shadow_frame, JValue* result) {
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
     ThrowStackOverflowError(self);
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 61140a2..b21ea84 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -45,17 +45,12 @@
                                              ShadowFrame* shadow_frame)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
+void ArtInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
+                                       ShadowFrame* shadow_frame, JValue* result)
+    SHARED_REQUIRES(Locks::mutator_lock_);
 
 }  // namespace interpreter
 
-extern "C" void artInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                  ShadowFrame* shadow_frame, JValue* result)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
-extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                                   ShadowFrame* shadow_frame, JValue* result)
-    SHARED_REQUIRES(Locks::mutator_lock_);
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_H_
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index ad34c9a..44eb29e 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -516,6 +516,39 @@
         Dbg::IsForcedInterpreterNeededForCalling(self, target);
 }
 
+static void ArtInterpreterToCompiledCodeBridge(Thread* self,
+                                               const DexFile::CodeItem* code_item,
+                                               ShadowFrame* shadow_frame,
+                                               JValue* result)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  ArtMethod* method = shadow_frame->GetMethod();
+  // Ensure static methods are initialized.
+  if (method->IsStatic()) {
+    mirror::Class* declaringClass = method->GetDeclaringClass();
+    if (UNLIKELY(!declaringClass->IsInitialized())) {
+      self->PushShadowFrame(shadow_frame);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(declaringClass));
+      if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true,
+                                                                            true))) {
+        self->PopShadowFrame();
+        DCHECK(self->IsExceptionPending());
+        return;
+      }
+      self->PopShadowFrame();
+      CHECK(h_class->IsInitializing());
+      // Reload from shadow frame in case the method moved, this is faster than adding a handle.
+      method = shadow_frame->GetMethod();
+    }
+  }
+  uint16_t arg_offset = (code_item == nullptr)
+                            ? 0
+                            : code_item->registers_size_ - code_item->ins_size_;
+  method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
+                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
+                 result, method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty());
+}
+
 template <bool is_range,
           bool do_assignability_check,
           size_t kVarArgMax>
@@ -586,7 +619,7 @@
   // Allocate shadow frame on the stack.
   const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
   ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
-      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, 0);
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, /* dex pc */ 0);
   ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
 
   // Initialize new shadow frame by copying the registers from the callee shadow frame.
@@ -690,9 +723,9 @@
   // Do the call now.
   if (LIKELY(Runtime::Current()->IsStarted())) {
     if (NeedsInterpreter(self, new_shadow_frame)) {
-      artInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
+      ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
     } else {
-      artInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
+      ArtInterpreterToCompiledCodeBridge(self, code_item, new_shadow_frame, result);
     }
   } else {
     UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index f57bddb..a5a8d81 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -45,6 +45,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "stack.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
@@ -79,12 +80,20 @@
 void ThrowNullPointerExceptionFromInterpreter()
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-static inline void DoMonitorEnter(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
+template <bool kMonitorCounting>
+static inline void DoMonitorEnter(Thread* self,
+                                  ShadowFrame* frame,
+                                  Object* ref) NO_THREAD_SAFETY_ANALYSIS {
   ref->MonitorEnter(self);
+  frame->GetLockCountData().AddMonitor<kMonitorCounting>(self, ref);
 }
 
-static inline void DoMonitorExit(Thread* self, Object* ref) NO_THREAD_SAFETY_ANALYSIS {
+template <bool kMonitorCounting>
+static inline void DoMonitorExit(Thread* self,
+                                 ShadowFrame* frame,
+                                 Object* ref) NO_THREAD_SAFETY_ANALYSIS {
   ref->MonitorExit(self);
+  frame->GetLockCountData().RemoveMonitorOrThrow<kMonitorCounting>(self, ref);
 }
 
 void AbortTransactionF(Thread* self, const char* fmt, ...)
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 9677d79..4265b50 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -89,6 +89,11 @@
       UnexpectedOpcode(inst, shadow_frame);                                                       \
   } HANDLE_INSTRUCTION_END();
 
+#define HANDLE_MONITOR_CHECKS()                                                                   \
+  if (!shadow_frame.GetLockCountData().                                                           \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+    HANDLE_PENDING_EXCEPTION();                                                                   \
+  }
 
 /**
  * Interpreter based on computed goto tables.
@@ -275,6 +280,7 @@
   HANDLE_INSTRUCTION_START(RETURN_VOID_NO_BARRIER) {
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -289,6 +295,7 @@
     QuasiAtomic::ThreadFenceForConstructor();
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -304,6 +311,7 @@
     result.SetJ(0);
     result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -318,6 +326,7 @@
     JValue result;
     result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
     if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
       instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
@@ -331,6 +340,7 @@
   HANDLE_INSTRUCTION_START(RETURN_OBJECT) {
     JValue result;
     self->AllowThreadSuspension();
+    HANDLE_MONITOR_CHECKS();
     const uint8_t vreg_index = inst->VRegA_11x(inst_data);
     Object* obj_result = shadow_frame.GetVRegReference(vreg_index);
     if (do_assignability_check && obj_result != nullptr) {
@@ -468,7 +478,7 @@
       ThrowNullPointerExceptionFromInterpreter();
       HANDLE_PENDING_EXCEPTION();
     } else {
-      DoMonitorEnter(self, obj);
+      DoMonitorEnter<do_access_check>(self, &shadow_frame, obj);
       POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
     }
   }
@@ -480,7 +490,7 @@
       ThrowNullPointerExceptionFromInterpreter();
       HANDLE_PENDING_EXCEPTION();
     } else {
-      DoMonitorExit(self, obj);
+      DoMonitorExit<do_access_check>(self, &shadow_frame, obj);
       POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), 1);
     }
   }
@@ -2544,6 +2554,8 @@
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame, dex_pc,
                                                                   instrumentation);
     if (found_dex_pc == DexFile::kDexNoIndex) {
+      // Structured locking is to be enforced for abnormal termination, too.
+      shadow_frame.GetLockCountData().CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);
       return JValue(); /* Handled in caller. */
     } else {
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 083dfb5..76d4bb0fc 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -31,6 +31,9 @@
                                                                   inst->GetDexPc(insns),        \
                                                                   instrumentation);             \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
+      /* Structured locking is to be enforced for abnormal termination, too. */                 \
+      shadow_frame.GetLockCountData().                                                          \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self);                        \
       return JValue(); /* Handled in caller. */                                                 \
     } else {                                                                                    \
       int32_t displacement = static_cast<int32_t>(found_dex_pc) - static_cast<int32_t>(dex_pc); \
@@ -47,6 +50,12 @@
     }                                                                             \
   } while (false)
 
+#define HANDLE_MONITOR_CHECKS()                                                                   \
+  if (!shadow_frame.GetLockCountData().                                                           \
+          CheckAllMonitorsReleasedOrThrow<do_assignability_check>(self)) {                        \
+    HANDLE_PENDING_EXCEPTION();                                                                   \
+  }
+
 // Code to run before each dex instruction.
 #define PREAMBLE()                                                                              \
   do {                                                                                          \
@@ -182,6 +191,7 @@
         PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -194,6 +204,7 @@
         QuasiAtomic::ThreadFenceForConstructor();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -207,6 +218,7 @@
         result.SetJ(0);
         result.SetI(shadow_frame.GetVReg(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -219,6 +231,7 @@
         JValue result;
         result.SetJ(shadow_frame.GetVRegLong(inst->VRegA_11x(inst_data)));
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         if (UNLIKELY(instrumentation->HasMethodExitListeners())) {
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
@@ -230,6 +243,7 @@
         PREAMBLE();
         JValue result;
         self->AllowThreadSuspension();
+        HANDLE_MONITOR_CHECKS();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
         Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != nullptr) {
@@ -366,7 +380,7 @@
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
         } else {
-          DoMonitorEnter(self, obj);
+          DoMonitorEnter<do_assignability_check>(self, &shadow_frame, obj);
           POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
         }
         break;
@@ -378,7 +392,7 @@
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
         } else {
-          DoMonitorExit(self, obj);
+          DoMonitorExit<do_assignability_check>(self, &shadow_frame, obj);
           POSSIBLY_HANDLE_PENDING_EXCEPTION(self->IsExceptionPending(), Next_1xx);
         }
         break;
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index c559389..92b6e4f 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1099,7 +1099,7 @@
     (*iter->second)(self, shadow_frame, result, arg_offset);
   } else {
     // Not special, continue with regular interpreter execution.
-    artInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
+    ArtInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
   }
 }
 
diff --git a/runtime/leb128.h b/runtime/leb128.h
index baf9da2..74934ae 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -127,8 +127,9 @@
   return dest;
 }
 
-template<typename Allocator>
-static inline void EncodeUnsignedLeb128(std::vector<uint8_t, Allocator>* dest, uint32_t value) {
+template <typename Vector>
+static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   uint8_t out = value & 0x7f;
   value >>= 7;
   while (value != 0) {
@@ -165,8 +166,9 @@
   return dest;
 }
 
-template<typename Allocator>
-static inline void EncodeSignedLeb128(std::vector<uint8_t, Allocator>* dest, int32_t value) {
+template<typename Vector>
+static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
   uint8_t out = value & 0x7f;
   while (extra_bits != 0u) {
@@ -179,10 +181,12 @@
 }
 
 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
-template <typename Allocator = std::allocator<uint8_t>>
+template <typename Vector = std::vector<uint8_t>>
 class Leb128Encoder {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+
  public:
-  explicit Leb128Encoder(std::vector<uint8_t, Allocator>* data) : data_(data) {
+  explicit Leb128Encoder(Vector* data) : data_(data) {
     DCHECK(data != nullptr);
   }
 
@@ -212,27 +216,29 @@
     }
   }
 
-  const std::vector<uint8_t, Allocator>& GetData() const {
+  const Vector& GetData() const {
     return *data_;
   }
 
  protected:
-  std::vector<uint8_t, Allocator>* const data_;
+  Vector* const data_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
 };
 
 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
-template <typename Allocator = std::allocator<uint8_t>>
-class Leb128EncodingVector FINAL : private std::vector<uint8_t, Allocator>,
-                                   public Leb128Encoder<Allocator> {
- public:
-  Leb128EncodingVector() : Leb128Encoder<Allocator>(this) { }
+template <typename Vector = std::vector<uint8_t>>
+class Leb128EncodingVector FINAL : private Vector,
+                                   public Leb128Encoder<Vector> {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
-  explicit Leb128EncodingVector(const Allocator& alloc)
-    : std::vector<uint8_t, Allocator>(alloc),
-      Leb128Encoder<Allocator>(this) { }
+ public:
+  Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
+
+  explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
+    : Vector(alloc),
+      Leb128Encoder<Vector>(this) { }
 
  private:
   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 4aebc2c..4850b6f 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -27,6 +27,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "oat_file_assistant.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "profiler.h"
 #include "runtime.h"
@@ -160,11 +161,14 @@
     return 0;
   }
 
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* linker = runtime->GetClassLinker();
   std::vector<std::unique_ptr<const DexFile>> dex_files;
   std::vector<std::string> error_msgs;
 
-  dex_files = linker->OpenDexFilesFromOat(sourceName.c_str(), outputName.c_str(), &error_msgs);
+  dex_files = runtime->GetOatFileManager().OpenDexFilesFromOat(sourceName.c_str(),
+                                                               outputName.c_str(),
+                                                               &error_msgs);
 
   if (!dex_files.empty()) {
     jlongArray array = ConvertNativeToJavaArray(env, dex_files);
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 5da15df..3a73900 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -522,6 +522,10 @@
   }
   if (classes == nullptr) {
     // Return an empty array instead of a null pointer.
+    if (soa.Self()->IsExceptionPending()) {
+      // Pending exception from GetDeclaredClasses.
+      return nullptr;
+    }
     mirror::Class* class_class = mirror::Class::GetJavaLangClass();
     mirror::Class* class_array_class =
         Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index e1e9ceb..45b9484 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -103,10 +103,17 @@
     // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
     // access checks anyways. TODO: Investigate if this the correct behavior.
     if (caller != nullptr && !caller->CanAccess(c.Get())) {
-      soa.Self()->ThrowNewExceptionF(
-          "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
-          PrettyClass(c.Get()).c_str(), PrettyClass(caller).c_str());
-      return nullptr;
+      if (PrettyDescriptor(c.Get()) == "dalvik.system.DexPathList$Element") {
+        // b/20699073.
+        LOG(WARNING) << "The dalvik.system.DexPathList$Element constructor is not accessible by "
+                        "default. This is a temporary workaround for backwards compatibility "
+                        "with class-loader hacks. Please update your application.";
+      } else {
+        soa.Self()->ThrowNewExceptionF(
+            "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
+            PrettyClass(c.Get()).c_str(), PrettyClass(caller).c_str());
+        return nullptr;
+      }
     }
   }
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(soa.Self(), c, true, true)) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 24acbc8..2aa5783 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '7', '1', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '7', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index a4a159e..80f017d 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -42,6 +42,7 @@
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "oat_file-inl.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "runtime.h"
 #include "utils.h"
@@ -115,7 +116,19 @@
   // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
   //       !executable is a sign that we may want to patch), which may not be allowed for
   //       various reasons.
-  if (kUseDlopen && (kIsTargetBuild || kUseDlopenOnHost) && executable) {
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  Runtime* const runtime = Runtime::Current();
+  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
+  if (kUseDlopen &&
+      (kIsTargetBuild ||
+          (kUseDlopenOnHost &&
+           // Manager may be null if we are running without a runtime.
+           manager != nullptr &&
+           manager->FindOpenedOatFileFromOatLocation(location) == nullptr)) &&
+      executable) {
     // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
     // this will register the oat file with the linker and allows libunwind to find our info.
     ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 20347a9..de4e8ec 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -31,6 +31,7 @@
 #include "compiler_callbacks.h"
 #include "gc/space/image_space.h"
 #include "mem_map.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
@@ -958,10 +959,12 @@
 
     // Load the dex files, and save a pointer to the loaded oat file, so that
     // we can verify only one oat file was loaded for the dex location.
-    ClassLinker* linker = Runtime::Current()->GetClassLinker();
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     std::vector<std::string> error_msgs;
-    dex_files = linker->OpenDexFilesFromOat(dex_location_.c_str(), oat_location_.c_str(), &error_msgs);
+    dex_files = Runtime::Current()->GetOatFileManager().OpenDexFilesFromOat(
+        dex_location_.c_str(),
+        oat_location_.c_str(),
+        &error_msgs);
     CHECK(!dex_files.empty()) << Join(error_msgs, '\n');
     CHECK(dex_files[0]->GetOatDexFile() != nullptr) << dex_files[0]->GetLocation();
     loaded_oat_file_ = dex_files[0]->GetOatDexFile()->GetOatFile();
@@ -980,8 +983,9 @@
 // Test the case where multiple processes race to generate an oat file.
 // This simulates multiple processes using multiple threads.
 //
-// We want only one Oat file to be loaded when there is a race to load, to
-// avoid using up the virtual memory address space.
+// We want unique Oat files to be loaded even when there is a race to load.
+// TODO: The test case no longer tests locking the way it was intended since we now get multiple
+// copies of the same Oat files mapped at different locations.
 TEST_F(OatFileAssistantTest, RaceToGenerate) {
   std::string dex_location = GetScratchDir() + "/RaceToGenerate.jar";
   std::string oat_location = GetOdexDir() + "/RaceToGenerate.oat";
@@ -1002,10 +1006,12 @@
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
 
-  // Verify every task got the same pointer.
-  const OatFile* expected = tasks[0]->GetLoadedOatFile();
+  // Verify every task got a unique oat file.
+  std::set<const OatFile*> oat_files;
   for (auto& task : tasks) {
-    EXPECT_EQ(expected, task->GetLoadedOatFile());
+    const OatFile* oat_file = task->GetLoadedOatFile();
+    EXPECT_TRUE(oat_files.find(oat_file) == oat_files.end());
+    oat_files.insert(oat_file);
   }
 }
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
new file mode 100644
index 0000000..73b065f
--- /dev/null
+++ b/runtime/oat_file_manager.cc
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oat_file_manager.h"
+
+#include <memory>
+#include <queue>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "dex_file.h"
+#include "gc/space/image_space.h"
+#include "oat_file_assistant.h"
+#include "thread-inl.h"
+
+namespace art {
+
+// For b/21333911.
+static constexpr bool kDuplicateClassesCheck = false;
+
+const OatFile* OatFileManager::RegisterOatFile(std::unique_ptr<const OatFile> oat_file) {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  DCHECK(oat_file != nullptr);
+  if (kIsDebugBuild) {
+    for (const std::unique_ptr<const OatFile>& existing : oat_files_) {
+      CHECK_NE(oat_file.get(), existing.get()) << oat_file->GetLocation();
+      // Check that we don't have an oat file with the same address. Copies of the same oat file
+      // should be loaded at different addresses.
+      CHECK_NE(oat_file->Begin(), existing->Begin()) << "Oat file already mapped at that location";
+    }
+  }
+  have_non_pic_oat_file_ = have_non_pic_oat_file_ || !oat_file->IsPic();
+  oat_files_.push_back(std::move(oat_file));
+  return oat_files_.back().get();
+}
+
+const OatFile* OatFileManager::FindOpenedOatFileFromOatLocation(const std::string& oat_location)
+    const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+    if (oat_file->GetLocation() == oat_location) {
+      return oat_file.get();
+    }
+  }
+  return nullptr;
+}
+
+const OatFile* OatFileManager::GetBootOatFile() const {
+  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+  if (image_space == nullptr) {
+    return nullptr;
+  }
+  return image_space->GetOatFile();
+}
+
+const OatFile* OatFileManager::GetPrimaryOatFile() const {
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+  const OatFile* boot_oat_file = GetBootOatFile();
+  if (boot_oat_file != nullptr) {
+    for (const std::unique_ptr<const OatFile>& oat_file : oat_files_) {
+      if (oat_file.get() != boot_oat_file) {
+        return oat_file.get();
+      }
+    }
+  }
+  return nullptr;
+}
+
+OatFileManager::~OatFileManager() {
+}
+
+const OatFile* OatFileManager::RegisterImageOatFile(gc::space::ImageSpace* space) {
+  return RegisterOatFile(space->ReleaseOatFile());
+}
+
+class DexFileAndClassPair : ValueObject {
+ public:
+  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
+     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
+       dex_file_(dex_file),
+       current_class_index_(current_class_index),
+       from_loaded_oat_(from_loaded_oat) {}
+
+  DexFileAndClassPair(DexFileAndClassPair&& rhs) {
+    *this = std::move(rhs);
+  }
+
+  DexFileAndClassPair& operator=(DexFileAndClassPair&& rhs) {
+    cached_descriptor_ = rhs.cached_descriptor_;
+    dex_file_ = std::move(rhs.dex_file_);
+    current_class_index_ = rhs.current_class_index_;
+    from_loaded_oat_ = rhs.from_loaded_oat_;
+    return *this;
+  }
+
+  const char* GetCachedDescriptor() const {
+    return cached_descriptor_;
+  }
+
+  bool operator<(const DexFileAndClassPair& rhs) const {
+    const int cmp = strcmp(cached_descriptor_, rhs.cached_descriptor_);
+    if (cmp != 0) {
+      // Note that the order must be reversed. We want to iterate over the classes in dex files.
+      // They are sorted lexicographically. Thus, the priority-queue must be a min-queue.
+      return cmp > 0;
+    }
+    return dex_file_ < rhs.dex_file_;
+  }
+
+  bool DexFileHasMoreClasses() const {
+    return current_class_index_ + 1 < dex_file_->NumClassDefs();
+  }
+
+  void Next() {
+    ++current_class_index_;
+  }
+
+  size_t GetCurrentClassIndex() const {
+    return current_class_index_;
+  }
+
+  bool FromLoadedOat() const {
+    return from_loaded_oat_;
+  }
+
+  const DexFile* GetDexFile() const {
+    return dex_file_.get();
+  }
+
+ private:
+  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
+    DCHECK(IsUint<16>(index));
+    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
+    return dex_file->StringByTypeIdx(class_def.class_idx_);
+  }
+
+  const char* cached_descriptor_;
+  std::unique_ptr<const DexFile> dex_file_;
+  size_t current_class_index_;
+  bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
+                          // and what was loaded before. Any old duplicates must have been
+                          // OK, and any new "internal" duplicates are as well (they must
+                          // be from multidex, which resolves correctly).
+};
+
+static void AddDexFilesFromOat(const OatFile* oat_file,
+                               bool already_loaded,
+                               /*out*/std::priority_queue<DexFileAndClassPair>* heap) {
+  for (const OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
+    std::string error;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
+    if (dex_file == nullptr) {
+      LOG(WARNING) << "Could not create dex file from oat file: " << error;
+    } else if (dex_file->NumClassDefs() > 0U) {
+      heap->emplace(dex_file.release(), /*current_class_index*/0U, already_loaded);
+    }
+  }
+}
+
+static void AddNext(/*inout*/DexFileAndClassPair* original,
+                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap) {
+  if (original->DexFileHasMoreClasses()) {
+    original->Next();
+    heap->push(std::move(*original));
+  }
+}
+
+// Check for class-def collisions in dex files.
+//
+// This works by maintaining a heap with one class from each dex file, sorted by the class
+// descriptor. Then a dex-file/class pair is continually removed from the heap and compared
+// against the following top element. If the descriptor is the same, it is now checked whether
+// the two elements agree on whether their dex file was from an already-loaded oat-file or the
+// new oat file. Any disagreement indicates a collision.
+bool OatFileManager::HasCollisions(const OatFile* oat_file,
+                                   std::string* error_msg /*out*/) const {
+  DCHECK(oat_file != nullptr);
+  DCHECK(error_msg != nullptr);
+  if (!kDuplicateClassesCheck) {
+    return false;
+  }
+
+  // Dex files are registered late - once a class is actually being loaded. We have to compare
+  // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses.
+  ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_);
+
+  std::priority_queue<DexFileAndClassPair> queue;
+
+  // Add dex files from already loaded oat files, but skip boot.
+  const OatFile* boot_oat = GetBootOatFile();
+  for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) {
+    if (loaded_oat_file.get() != boot_oat) {
+      AddDexFilesFromOat(loaded_oat_file.get(), /*already_loaded*/true, &queue);
+    }
+  }
+
+  if (queue.empty()) {
+    // No other oat files, return early.
+    return false;
+  }
+
+  // Add dex files from the oat file to check.
+  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue);
+
+  // Now drain the queue.
+  while (!queue.empty()) {
+    // Modifying the top element is only safe if we pop right after.
+    DexFileAndClassPair compare_pop(std::move(const_cast<DexFileAndClassPair&>(queue.top())));
+    queue.pop();
+
+    // Compare against the following elements.
+    while (!queue.empty()) {
+      DexFileAndClassPair top(std::move(const_cast<DexFileAndClassPair&>(queue.top())));
+
+      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
+        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
+        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
+          *error_msg =
+              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
+                           compare_pop.GetCachedDescriptor(),
+                           compare_pop.GetDexFile()->GetLocation().c_str(),
+                           top.GetDexFile()->GetLocation().c_str());
+          return true;
+        }
+        // Pop it.
+        queue.pop();
+        AddNext(&top, &queue);
+      } else {
+        // Something else. Done here.
+        break;
+      }
+    }
+    AddNext(&compare_pop, &queue);
+  }
+
+  return false;
+}
+
+std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
+    const char* dex_location,
+    const char* oat_location,
+    std::vector<std::string>* error_msgs) {
+  CHECK(dex_location != nullptr);
+  CHECK(error_msgs != nullptr);
+
+  // Verify we aren't holding the mutator lock, which could starve GC if we
+  // have to generate or relocate an oat file.
+  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
+
+  OatFileAssistant oat_file_assistant(dex_location,
+                                      oat_location,
+                                      kRuntimeISA,
+                                      !Runtime::Current()->IsAotCompiler());
+
+  // Lock the target oat location to avoid races generating and loading the
+  // oat file.
+  std::string error_msg;
+  if (!oat_file_assistant.Lock(/*out*/&error_msg)) {
+    // Don't worry too much if this fails. If it does fail, it's unlikely we
+    // can generate an oat file anyway.
+    VLOG(class_linker) << "OatFileAssistant::Lock: " << error_msg;
+  }
+
+  const OatFile* source_oat_file = nullptr;
+
+  // Update the oat file on disk if we can. This may fail, but that's okay.
+  // Best effort is all that matters here.
+  if (!oat_file_assistant.MakeUpToDate(/*out*/&error_msg)) {
+    LOG(WARNING) << error_msg;
+  }
+
+  // Get the oat file on disk.
+  std::unique_ptr<const OatFile> oat_file(oat_file_assistant.GetBestOatFile().release());
+  if (oat_file != nullptr) {
+    // Take the file only if it has no collisions, or we must take it because of preopting.
+    bool accept_oat_file = !HasCollisions(oat_file.get(), /*out*/ &error_msg);
+    if (!accept_oat_file) {
+      // Failed the collision check. Print warning.
+      if (Runtime::Current()->IsDexFileFallbackEnabled()) {
+        LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for "
+                     << dex_location;
+      } else {
+        LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to "
+                        " load classes for " << dex_location;
+      }
+      LOG(WARNING) << error_msg;
+
+      // However, if the app was part of /system and preopted, there is no original dex file
+      // available. In that case grudgingly accept the oat file.
+      if (!DexFile::MaybeDex(dex_location)) {
+        accept_oat_file = true;
+        LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
+                     << "Allow oat file use. This is potentially dangerous.";
+      }
+    }
+
+    if (accept_oat_file) {
+      VLOG(class_linker) << "Registering " << oat_file->GetLocation();
+      source_oat_file = RegisterOatFile(std::move(oat_file));
+    }
+  }
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+
+  // Load the dex files from the oat file.
+  if (source_oat_file != nullptr) {
+    dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+    if (dex_files.empty()) {
+      error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
+    }
+  }
+
+  // Fall back to running out of the original dex file if we couldn't load any
+  // dex_files from the oat file.
+  if (dex_files.empty()) {
+    if (oat_file_assistant.HasOriginalDexFiles()) {
+      if (Runtime::Current()->IsDexFileFallbackEnabled()) {
+        if (!DexFile::Open(dex_location, dex_location, /*out*/ &error_msg, &dex_files)) {
+          LOG(WARNING) << error_msg;
+          error_msgs->push_back("Failed to open dex files from " + std::string(dex_location));
+        }
+      } else {
+        error_msgs->push_back("Fallback mode disabled, skipping dex files.");
+      }
+    } else {
+      error_msgs->push_back("No original dex files found for dex location "
+          + std::string(dex_location));
+    }
+  }
+  return dex_files;
+}
+
+}  // namespace art
diff --git a/runtime/oat_file_manager.h b/runtime/oat_file_manager.h
new file mode 100644
index 0000000..3059cb5
--- /dev/null
+++ b/runtime/oat_file_manager.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OAT_FILE_MANAGER_H_
+#define ART_RUNTIME_OAT_FILE_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+
+namespace art {
+
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
+
+class DexFile;
+class OatFile;
+
+// Class for dealing with oat file management.
+//
+// This class knows about all the loaded oat files and provides utility functions. The oat file
+// pointers returned from functions are always valid.
+class OatFileManager {
+ public:
+  OatFileManager() : have_non_pic_oat_file_(false) {}
+  ~OatFileManager();
+
+  // Add an oat file to the internal accounting, std::aborts if there already exists an oat file
+  // with the same base address. Returns the oat file pointer from oat_file.
+  const OatFile* RegisterOatFile(std::unique_ptr<const OatFile> oat_file)
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Find the first opened oat file with the same location, returns null if there are none.
+  const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Returns true if we have a non pic oat file.
+  bool HaveNonPicOatFile() const {
+    return have_non_pic_oat_file_;
+  }
+
+  // Returns the boot image oat file.
+  const OatFile* GetBootOatFile() const;
+
+  // Returns the first non-image oat file in the class path.
+  const OatFile* GetPrimaryOatFile() const REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Return the oat file for an image, registers the oat file. Takes ownership of the imagespace's
+  // underlying oat file.
+  const OatFile* RegisterImageOatFile(gc::space::ImageSpace* space)
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  // Finds or creates the oat file holding dex_location. Then loads and returns
+  // all corresponding dex files (there may be more than one dex file loaded
+  // in the case of multidex).
+  // This may return the original, unquickened dex files if the oat file could
+  // not be generated.
+  //
+  // Returns an empty vector if the dex files could not be loaded. In this
+  // case, there will be at least one error message returned describing why no
+  // dex files could not be loaded. The 'error_msgs' argument must not be
+  // null, regardless of whether there is an error or not.
+  //
+  // This method should not be called with the mutator_lock_ held, because it
+  // could end up starving GC if we need to generate or relocate any oat
+  // files.
+  std::vector<std::unique_ptr<const DexFile>> OpenDexFilesFromOat(
+      const char* dex_location,
+      const char* oat_location,
+      /*out*/std::vector<std::string>* error_msgs)
+      REQUIRES(!Locks::oat_file_manager_lock_, !Locks::mutator_lock_);
+
+ private:
+  // Check for duplicate class definitions of the given oat file against all open oat files.
+  // Return true if there are any class definition collisions in the oat_file.
+  bool HasCollisions(const OatFile* oat_file, /*out*/std::string* error_msg) const
+      REQUIRES(!Locks::oat_file_manager_lock_);
+
+  std::vector<std::unique_ptr<const OatFile>> oat_files_ GUARDED_BY(Locks::oat_file_manager_lock_);
+  bool have_non_pic_oat_file_;
+  DISALLOW_COPY_AND_ASSIGN(OatFileManager);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_OAT_FILE_MANAGER_H_
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 64c2249..837662d 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -74,6 +74,7 @@
   kIntrinsicUnsafeGet,
   kIntrinsicUnsafePut,
   kIntrinsicSystemArrayCopyCharArray,
+  kIntrinsicSystemArrayCopy,
 
   kInlineOpNop,
   kInlineOpReturnArg,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1f447d0..7a1f0af 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -115,6 +115,7 @@
 #include "native/sun_misc_Unsafe.h"
 #include "native_bridge_art_interface.h"
 #include "oat_file.h"
+#include "oat_file_manager.h"
 #include "os.h"
 #include "parsed_options.h"
 #include "profiler.h"
@@ -281,6 +282,7 @@
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
+  oat_file_manager_.reset();
   delete heap_;
   delete intern_table_;
   delete java_vm_;
@@ -698,7 +700,7 @@
 }
 
 bool Runtime::IsDebuggable() const {
-  const OatFile* oat_file = GetClassLinker()->GetPrimaryOatFile();
+  const OatFile* oat_file = GetOatFileManager().GetPrimaryOatFile();
   return oat_file != nullptr && oat_file->IsDebuggable();
 }
 
@@ -756,9 +758,9 @@
   if (elf_file.get() == nullptr) {
     return false;
   }
-  std::unique_ptr<OatFile> oat_file(OatFile::OpenWithElfFile(elf_file.release(), oat_location,
-                                                             nullptr, &error_msg));
-  if (oat_file.get() == nullptr) {
+  std::unique_ptr<const OatFile> oat_file(
+      OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
+  if (oat_file == nullptr) {
     LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
@@ -775,7 +777,7 @@
       dex_files->push_back(std::move(dex_file));
     }
   }
-  Runtime::Current()->GetClassLinker()->RegisterOatFile(oat_file.release());
+  Runtime::Current()->GetOatFileManager().RegisterOatFile(std::move(oat_file));
   return true;
 }
 
@@ -831,6 +833,8 @@
 
   QuasiAtomic::Startup();
 
+  oat_file_manager_.reset(new OatFileManager);
+
   Monitor::Init(runtime_options.GetOrDefault(Opt::LockProfThreshold),
                 runtime_options.GetOrDefault(Opt::HookIsSensitiveThread));
 
@@ -1426,6 +1430,7 @@
     // Guaranteed to have no new roots in the constant roots.
     VisitConstantRoots(visitor);
   }
+  Dbg::VisitRoots(visitor);
 }
 
 void Runtime::VisitTransactionRoots(RootVisitor* visitor) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 6154c34..abccb44 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -82,6 +82,7 @@
 class MonitorList;
 class MonitorPool;
 class NullPointerHandler;
+class OatFileManager;
 class SignalCatcher;
 class StackOverflowHandler;
 class SuspensionHandler;
@@ -573,6 +574,11 @@
   // Create a normal LinearAlloc or low 4gb version if we are 64 bit AOT compiler.
   LinearAlloc* CreateLinearAlloc();
 
+  OatFileManager& GetOatFileManager() const {
+    DCHECK(oat_file_manager_ != nullptr);
+    return *oat_file_manager_.get();
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -770,6 +776,9 @@
   // Contains the build fingerprint, if given as a parameter.
   std::string fingerprint_;
 
+  // Oat file manager, keeps track of what oat files are open.
+  std::unique_ptr<OatFileManager> oat_file_manager_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 1d21a64..d93a57d 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -1051,4 +1051,87 @@
   }
 }
 
+void LockCountData::AddMonitorInternal(Thread* self, mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+
+  // If there's an error during enter, we won't have locked the monitor. So check there's no
+  // exception.
+  if (self->IsExceptionPending()) {
+    return;
+  }
+
+  if (monitors_ == nullptr) {
+    monitors_.reset(new std::vector<mirror::Object*>());
+  }
+  monitors_->push_back(obj);
+}
+
+void LockCountData::RemoveMonitorInternal(Thread* self, const mirror::Object* obj) {
+  if (obj == nullptr) {
+    return;
+  }
+  bool found_object = false;
+  if (monitors_ != nullptr) {
+    // We need to remove one pointer to ref, as duplicates are used for counting recursive locks.
+    // We arbitrarily choose the first one.
+    auto it = std::find(monitors_->begin(), monitors_->end(), obj);
+    if (it != monitors_->end()) {
+      monitors_->erase(it);
+      found_object = true;
+    }
+  }
+  if (!found_object) {
+    // The object wasn't found. Time for an IllegalMonitorStateException.
+    // The order here isn't fully clear. Assume that any other pending exception is swallowed.
+    // TODO: Maybe make already pending exception a suppressed exception.
+    self->ClearException();
+    self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
+                             "did not lock monitor on object of type '%s' before unlocking",
+                             PrettyTypeOf(const_cast<mirror::Object*>(obj)).c_str());
+  }
+}
+
+// Helper to unlock a monitor. Must be NO_THREAD_SAFETY_ANALYSIS, as we can't statically show
+// that the object was locked.
+void MonitorExitHelper(Thread* self, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(self != nullptr);
+  DCHECK(obj != nullptr);
+  obj->MonitorExit(self);
+}
+
+bool LockCountData::CheckAllMonitorsReleasedInternal(Thread* self) {
+  DCHECK(self != nullptr);
+  if (monitors_ != nullptr) {
+    if (!monitors_->empty()) {
+      // There may be an exception pending, if the method is terminating abruptly. Clear it.
+      // TODO: Should we add this as a suppressed exception?
+      self->ClearException();
+
+      // OK, there are monitors that are still locked. To enforce structured locking (and avoid
+      // deadlocks) we unlock all of them before we raise the IllegalMonitorState exception.
+      for (mirror::Object* obj : *monitors_) {
+        MonitorExitHelper(self, obj);
+        // If this raised an exception, ignore. TODO: Should we add this as suppressed
+        // exceptions?
+        if (self->IsExceptionPending()) {
+          self->ClearException();
+        }
+      }
+      // Raise an exception, just give the first object as the sample.
+      mirror::Object* first = (*monitors_)[0];
+      self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
+                               "did not unlock monitor on object of type '%s'",
+                               PrettyTypeOf(first).c_str());
+
+      // To make sure this path is not triggered again, clean out the monitors.
+      monitors_->clear();
+
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/stack.h b/runtime/stack.h
index 31acf0e..32a4765 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -21,6 +21,8 @@
 #include <string>
 
 #include "arch/instruction_set.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "dex_file.h"
 #include "gc_root.h"
 #include "mirror/object_reference.h"
@@ -66,6 +68,72 @@
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
+// The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
+// thread roots).
+// Note: implementation is split so that the call sites may be optimized to no-ops in case no
+//       lock counting is necessary. The actual implementation is in the cc file to avoid
+//       dependencies.
+class LockCountData {
+ public:
+  // Add the given object to the list of monitors, that is, objects that have been locked. This
+  // will not throw (but be skipped if there is an exception pending on entry).
+  template <bool kLockCounting>
+  void AddMonitor(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return;
+    }
+    AddMonitorInternal(self, obj);
+  }
+
+  // Try to remove the given object from the monitor list, indicating an unlock operation.
+  // This will throw an IllegalMonitorStateException (clearing any already pending exception), in
+  // case that there wasn't a lock recorded for the object.
+  template <bool kLockCounting>
+  void RemoveMonitorOrThrow(Thread* self,
+                            const mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return;
+    }
+    RemoveMonitorInternal(self, obj);
+  }
+
+  // Check whether all acquired monitors have been released. This will potentially throw an
+  // IllegalMonitorStateException, clearing any already pending exception. Returns true if the
+  // check shows that everything is OK wrt/ lock counting, false otherwise.
+  template <bool kLockCounting>
+  bool CheckAllMonitorsReleasedOrThrow(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(self != nullptr);
+    if (!kLockCounting) {
+      return true;
+    }
+    return CheckAllMonitorsReleasedInternal(self);
+  }
+
+  template <typename T, typename... Args>
+  void VisitMonitors(T visitor, Args&&... args) SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (monitors_ != nullptr) {
+      // Visitors may change the Object*. Be careful with the foreach loop.
+      for (mirror::Object*& obj : *monitors_) {
+        visitor(/* inout */ &obj, std::forward<Args>(args)...);
+      }
+    }
+  }
+
+ private:
+  // Internal implementations.
+  void AddMonitorInternal(Thread* self, mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
+  void RemoveMonitorInternal(Thread* self, const mirror::Object* obj)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool CheckAllMonitorsReleasedInternal(Thread* self) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Stores references to the locked-on objects. As noted, this should be visited during thread
+  // marking.
+  std::unique_ptr<std::vector<mirror::Object*>> monitors_;
+};
+
 // ShadowFrame has 2 possible layouts:
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
@@ -272,6 +340,10 @@
     }
   }
 
+  LockCountData& GetLockCountData() {
+    return lock_count_data_;
+  }
+
   static size_t LinkOffset() {
     return OFFSETOF_MEMBER(ShadowFrame, link_);
   }
@@ -330,6 +402,7 @@
   ShadowFrame* link_;
   ArtMethod* method_;
   uint32_t dex_pc_;
+  LockCountData lock_count_data_;  // This may contain GC roots when lock counting is active.
 
   // This is a two-part array:
   //  - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 65f71ef..f1407a7 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -106,19 +106,17 @@
   UNIMPLEMENTED(FATAL);
 }
 
-void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
-                     QuickEntryPoints* qpoints);
+void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints);
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
-  uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.interpreter_entrypoints);
+  uintptr_t* begin = reinterpret_cast<uintptr_t*>(&tlsPtr_.jni_entrypoints);
   uintptr_t* end = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(&tlsPtr_.quick_entrypoints) +
       sizeof(tlsPtr_.quick_entrypoints));
   for (uintptr_t* it = begin; it != end; ++it) {
     *it = reinterpret_cast<uintptr_t>(UnimplementedEntryPoint);
   }
-  InitEntryPoints(&tlsPtr_.interpreter_entrypoints, &tlsPtr_.jni_entrypoints,
-                  &tlsPtr_.quick_entrypoints);
+  InitEntryPoints(&tlsPtr_.jni_entrypoints, &tlsPtr_.quick_entrypoints);
 }
 
 void Thread::InitStringEntryPoints() {
@@ -732,6 +730,18 @@
   // a native peer!
   if (create_peer) {
     self->CreatePeer(thread_name, as_daemon, thread_group);
+    if (self->IsExceptionPending()) {
+      // We cannot keep the exception around, as we're deleting self. Try to be helpful and log it.
+      {
+        ScopedObjectAccess soa(self);
+        LOG(ERROR) << "Exception creating thread peer:";
+        LOG(ERROR) << self->GetException()->Dump();
+        self->ClearException();
+      }
+      runtime->GetThreadList()->Unregister(self);
+      // Unregister deletes self, no need to do this here.
+      return nullptr;
+    }
   } else {
     // These aren't necessary, but they improve diagnostics for unit tests & command-line tools.
     if (thread_name != nullptr) {
@@ -790,7 +800,9 @@
                                 WellKnownClasses::java_lang_Thread,
                                 WellKnownClasses::java_lang_Thread_init,
                                 thread_group, thread_name.get(), thread_priority, thread_is_daemon);
-  AssertNoPendingException();
+  if (IsExceptionPending()) {
+    return;
+  }
 
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
@@ -1538,6 +1550,7 @@
   // Finish attaching the main thread.
   ScopedObjectAccess soa(Thread::Current());
   Thread::Current()->CreatePeer("main", false, runtime->GetMainThreadGroup());
+  Thread::Current()->AssertNoPendingException();
 
   Runtime::Current()->GetClassLinker()->RunRootClinits();
 }
@@ -2367,15 +2380,6 @@
   DO_THREAD_OFFSET(ThreadSuspendTriggerOffset<ptr_size>(), "suspend_trigger")
 #undef DO_THREAD_OFFSET
 
-#define INTERPRETER_ENTRY_POINT_INFO(x) \
-    if (INTERPRETER_ENTRYPOINT_OFFSET(ptr_size, x).Uint32Value() == offset) { \
-      os << #x; \
-      return; \
-    }
-  INTERPRETER_ENTRY_POINT_INFO(pInterpreterToInterpreterBridge)
-  INTERPRETER_ENTRY_POINT_INFO(pInterpreterToCompiledCodeBridge)
-#undef INTERPRETER_ENTRY_POINT_INFO
-
 #define JNI_ENTRY_POINT_INFO(x) \
     if (JNI_ENTRYPOINT_OFFSET(ptr_size, x).Uint32Value() == offset) { \
       os << #x; \
@@ -2667,6 +2671,8 @@
         }
       }
     }
+    // Mark lock count map required for structured locking checks.
+    shadow_frame->GetLockCountData().VisitMonitors(visitor_, -1, this);
   }
 
  private:
diff --git a/runtime/thread.h b/runtime/thread.h
index d262c62..8cea10c 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -30,7 +30,6 @@
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "entrypoints/interpreter/interpreter_entrypoints.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "globals.h"
@@ -580,12 +579,6 @@
   }
 
   template<size_t pointer_size>
-  static ThreadOffset<pointer_size> InterpreterEntryPointOffset(size_t interp_entrypoint_offset) {
-    return ThreadOffsetFromTlsPtr<pointer_size>(
-        OFFSETOF_MEMBER(tls_ptr_sized_values, interpreter_entrypoints) + interp_entrypoint_offset);
-  }
-
-  template<size_t pointer_size>
   static ThreadOffset<pointer_size> JniEntryPointOffset(size_t jni_entrypoint_offset) {
     return ThreadOffsetFromTlsPtr<pointer_size>(
         OFFSETOF_MEMBER(tls_ptr_sized_values, jni_entrypoints) + jni_entrypoint_offset);
@@ -1329,7 +1322,6 @@
 
     // Entrypoint function pointers.
     // TODO: move this to more of a global offset table model to avoid per-thread duplication.
-    InterpreterEntryPoints interpreter_entrypoints;
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
diff --git a/runtime/utils.h b/runtime/utils.h
index 3e61824..19cc462 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -286,8 +286,9 @@
   }
 };
 
-template <typename Alloc>
-void Push32(std::vector<uint8_t, Alloc>* buf, int32_t data) {
+template <typename Vector>
+void Push32(Vector* buf, int32_t data) {
+  static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
   buf->push_back(data & 0xff);
   buf->push_back((data >> 8) & 0xff);
   buf->push_back((data >> 16) & 0xff);
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index dd88db0..ee3a3b9 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -120,7 +120,7 @@
     private static void testRemoveLocalObject() {
         removeLocalObject(new Object());
     }
-
+    
     private static native short shortMethod(short s1, short s2, short s3, short s4, short s5, short s6, short s7,
         short s8, short s9, short s10);
 
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index 2188055..d742b14 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -27,6 +27,13 @@
      */
     public static void main(String[] args) {
         System.loadLibrary(args[0]);
+        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
+            // Some tests ensure that the verifier was able to guarantee balanced locking by
+            // asserting that the test function is running as compiled code. But skip this now,
+            // as this seems to be a non-compiled code test configuration.
+            disableStackFrameAsserts();
+        }
+
         Main m = new Main();
 
         m.recursiveSync(0);
@@ -49,7 +56,7 @@
         Object obj1 = new Object();
         Object obj2 = new Object();
 
-        m.twoPath(obj1, obj2, 0);
+        TwoPath.twoPath(obj1, obj2, 0);
         System.out.println("twoPath ok");
 
         m.triplet(obj1, obj2, 0);
@@ -62,6 +69,7 @@
      * Recursive synchronized method.
      */
     synchronized void recursiveSync(int iter) {
+        assertIsManaged();
         if (iter < 40) {
             recursiveSync(iter+1);
         } else {
@@ -73,6 +81,7 @@
      * Tests simple nesting, with and without a throw.
      */
     void nestedMayThrow(boolean doThrow) {
+        assertIsManaged();
         synchronized (this) {
             synchronized (Main.class) {
                 synchronized (new Object()) {
@@ -90,6 +99,7 @@
      * Exercises bug 3215458.
      */
     void constantLock() {
+        assertIsManaged();
         Class thing = Thread.class;
         synchronized (Thread.class) {}
     }
@@ -98,6 +108,7 @@
      * Confirms that we can have 32 nested monitors on one method.
      */
     void notExcessiveNesting() {
+        assertIsManaged();
         synchronized (this) {   // 1
         synchronized (this) {   // 2
         synchronized (this) {   // 3
@@ -138,6 +149,7 @@
      * method.
      */
     void notNested() {
+        assertIsManaged();
         synchronized (this) {}  // 1
         synchronized (this) {}  // 2
         synchronized (this) {}  // 3
@@ -178,25 +190,6 @@
     private void doNothing(Object obj) {}
 
     /**
-     * Conditionally uses one of the synchronized objects.
-     */
-    public void twoPath(Object obj1, Object obj2, int x) {
-        Object localObj;
-
-        synchronized (obj1) {
-            synchronized(obj2) {
-                if (x == 0) {
-                    localObj = obj2;
-                } else {
-                    localObj = obj1;
-                }
-            }
-        }
-
-        doNothing(localObj);
-    }
-
-    /**
      * Lock the monitor two or three times, and make use of the locked or
      * unlocked object.
      */
@@ -220,17 +213,12 @@
 
     // Smali testing code.
     private static void runSmaliTests() {
-        if (!hasOatFile() || runtimeIsSoftFail() || isInterpreted()) {
-            // Skip test, this seems to be a non-compiled code test configuration.
-            return;
-        }
-
         runTest("OK", new Object[] { new Object(), new Object() }, null);
         runTest("TooDeep", new Object[] { new Object() }, null);
         runTest("NotStructuredOverUnlock", new Object[] { new Object() },
                 IllegalMonitorStateException.class);
-        runTest("NotStructuredUnderUnlock", new Object[] { new Object() }, null);
-                // TODO: new IllegalMonitorStateException());
+        runTest("NotStructuredUnderUnlock", new Object[] { new Object() },
+                IllegalMonitorStateException.class);
         runTest("UnbalancedJoin", new Object[] { new Object(), new Object() }, null);
         runTest("UnbalancedStraight", new Object[] { new Object(), new Object() }, null);
     }
@@ -282,4 +270,5 @@
     public static native boolean hasOatFile();
     public static native boolean runtimeIsSoftFail();
     public static native boolean isInterpreted();
+    public static native void disableStackFrameAsserts();
 }
diff --git a/test/088-monitor-verification/src/TwoPath.java b/test/088-monitor-verification/src/TwoPath.java
new file mode 100644
index 0000000..2542de7
--- /dev/null
+++ b/test/088-monitor-verification/src/TwoPath.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+/*
+ * Test case for conditionally using one of two synchronized objects.
+ *
+ * This code cannot be verified at the moment, as the join point merges a register with two
+ * different lock options. Do not put it into Main to avoid the whole class being run in the
+ * interpreter.
+ */
+public class TwoPath {
+
+    /**
+     * Conditionally uses one of the synchronized objects.
+     */
+    public static void twoPath(Object obj1, Object obj2, int x) {
+        Object localObj;
+
+        synchronized (obj1) {
+            synchronized(obj2) {
+                if (x == 0) {
+                    localObj = obj2;
+                } else {
+                    localObj = obj1;
+                }
+            }
+        }
+
+        doNothing(localObj);
+    }
+
+    private static void doNothing(Object o) {
+    }
+}
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 134abd1..f1885de 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -537,6 +537,17 @@
     return ((SubclassA)a).toString();
   }
 
+
+  /// CHECK-START: void Main.argumentCheck(Super, double, SubclassA, Final) reference_type_propagation (after)
+  /// CHECK:      ParameterValue klass:Main can_be_null:false exact:false
+  /// CHECK:      ParameterValue klass:Super can_be_null:true exact:false
+  /// CHECK:      ParameterValue
+  /// CHECK:      ParameterValue klass:SubclassA can_be_null:true exact:false
+  /// CHECK:      ParameterValue klass:Final can_be_null:true exact:true
+  /// CHECK-NOT:  ParameterValue
+  private void argumentCheck(Super s, double d, SubclassA a, Final f) {
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/536-checker-intrinsic-optimization/expected.txt b/test/536-checker-intrinsic-optimization/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/536-checker-intrinsic-optimization/expected.txt
diff --git a/test/536-checker-intrinsic-optimization/info.txt b/test/536-checker-intrinsic-optimization/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/536-checker-intrinsic-optimization/info.txt
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
new file mode 100644
index 0000000..1b784ae
--- /dev/null
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static void main(String[] args) {
+    stringEqualsSame();
+    stringArgumentNotNull("Foo");
+  }
+
+  /// CHECK-START: boolean Main.stringEqualsSame() instruction_simplifier (before)
+  /// CHECK:      InvokeStaticOrDirect
+
+  /// CHECK-START: boolean Main.stringEqualsSame() register (before)
+  /// CHECK:      <<Const1:i\d+>> IntConstant 1
+  /// CHECK:      Return [<<Const1>>]
+
+  /// CHECK-START: boolean Main.stringEqualsSame() register (before)
+  /// CHECK-NOT:  InvokeStaticOrDirect
+  public static boolean stringEqualsSame() {
+    return $inline$callStringEquals("obj", "obj");
+  }
+
+  /// CHECK-START: boolean Main.stringEqualsNull() register (after)
+  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      Return [<<Invoke>>]
+  public static boolean stringEqualsNull() {
+    String o = (String)myObject;
+    return $inline$callStringEquals(o, o);
+  }
+
+  public static boolean $inline$callStringEquals(String a, String b) {
+    return a.equals(b);
+  }
+
+  /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK-NOT:      test
+  public static boolean stringArgumentNotNull(Object obj) {
+    obj.getClass();
+    return "foo".equals(obj);
+  }
+
+  // Test is very brittle as it depends on the order we emit instructions.
+  /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      test
+  /// CHECK:      jz/eq
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:  mov
+  /// CHECK:      cmp
+  public static boolean stringArgumentIsString() {
+    return "foo".equals(myString);
+  }
+
+  static String myString;
+  static Object myObject;
+}
diff --git a/test/536-checker-needs-access-check/expected.txt b/test/536-checker-needs-access-check/expected.txt
new file mode 100644
index 0000000..4acae95
--- /dev/null
+++ b/test/536-checker-needs-access-check/expected.txt
@@ -0,0 +1,4 @@
+Got expected error instanceof
+Got expected error instanceof null
+Got expected error checkcast null
+Got expected error instanceof (keep LoadClass with access check)
diff --git a/test/536-checker-needs-access-check/info.txt b/test/536-checker-needs-access-check/info.txt
new file mode 100644
index 0000000..3413cf3
--- /dev/null
+++ b/test/536-checker-needs-access-check/info.txt
@@ -0,0 +1 @@
+Verifies that we don't remove type checks when we need to check for access rights.
diff --git a/test/536-checker-needs-access-check/src/Main.java b/test/536-checker-needs-access-check/src/Main.java
new file mode 100644
index 0000000..7bd49c1
--- /dev/null
+++ b/test/536-checker-needs-access-check/src/Main.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import other.InaccessibleClass;
+import other.InaccessibleClassProxy;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            testInstanceOf();
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error instanceof");
+        }
+
+         try {
+            testInstanceOfNull();
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error instanceof null");
+        }
+
+        try {
+            testCheckCastNull();
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error checkcast null");
+        }
+
+        try {
+            testDontGvnLoadClassWithAccessChecks(new Object());
+        } catch (IllegalAccessError e) {
+            System.out.println("Got expected error instanceof (keep LoadClass with access check)");
+        }
+    }
+
+    /// CHECK-START: boolean Main.testInstanceOf() register (after)
+    /// CHECK: InstanceOf
+    public static boolean testInstanceOf() {
+        return ic instanceof InaccessibleClass;
+    }
+
+    /// CHECK-START: boolean Main.testInstanceOfNull() register (after)
+    /// CHECK: InstanceOf
+    public static boolean testInstanceOfNull() {
+        return null instanceof InaccessibleClass;
+    }
+
+    // TODO: write a test for for CheckCast with not null constant (after RTP can parse arguments).
+
+    /// CHECK-START: other.InaccessibleClass Main.testCheckCastNull() register (after)
+    /// CHECK: CheckCast
+    public static InaccessibleClass testCheckCastNull() {
+        return (InaccessibleClass) null;
+    }
+
+    /// CHECK-START: boolean Main.testDontGvnLoadClassWithAccessChecks(java.lang.Object) inliner (before)
+    /// CHECK: InvokeStaticOrDirect
+
+    /// CHECK-START: boolean Main.testDontGvnLoadClassWithAccessChecks(java.lang.Object) inliner (after)
+    /// CHECK-NOT: InvokeStaticOrDirect
+
+    /// CHECK-START: boolean Main.testDontGvnLoadClassWithAccessChecks(java.lang.Object) GVN (after)
+    /// CHECK: LoadClass needs_access_check:false
+    /// CHECK: LoadClass needs_access_check:true
+    public static boolean testDontGvnLoadClassWithAccessChecks(Object o) {
+        InaccessibleClassProxy.test(o);
+        return ic instanceof InaccessibleClass;
+    }
+
+    public static InaccessibleClass ic;
+}
diff --git a/test/536-checker-needs-access-check/src/other/InaccessibleClass.java b/test/536-checker-needs-access-check/src/other/InaccessibleClass.java
new file mode 100644
index 0000000..de2e1d7
--- /dev/null
+++ b/test/536-checker-needs-access-check/src/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClass {
+}
diff --git a/test/536-checker-needs-access-check/src/other/InaccessibleClassProxy.java b/test/536-checker-needs-access-check/src/other/InaccessibleClassProxy.java
new file mode 100644
index 0000000..4c005e4
--- /dev/null
+++ b/test/536-checker-needs-access-check/src/other/InaccessibleClassProxy.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClassProxy {
+  public static boolean test(Object o) {
+    return o instanceof InaccessibleClass;
+  }
+}
diff --git a/test/536-checker-needs-access-check/src2/other/InaccessibleClass.java b/test/536-checker-needs-access-check/src2/other/InaccessibleClass.java
new file mode 100644
index 0000000..2732263
--- /dev/null
+++ b/test/536-checker-needs-access-check/src2/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+/*package*/ class InaccessibleClass {
+}
diff --git a/test/536-checker-needs-access-check/src2/other/InaccessibleClassProxy.java b/test/536-checker-needs-access-check/src2/other/InaccessibleClassProxy.java
new file mode 100644
index 0000000..4c005e4
--- /dev/null
+++ b/test/536-checker-needs-access-check/src2/other/InaccessibleClassProxy.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClassProxy {
+  public static boolean test(Object o) {
+    return o instanceof InaccessibleClass;
+  }
+}
diff --git a/test/537-checker-arraycopy/expected.txt b/test/537-checker-arraycopy/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-arraycopy/expected.txt
diff --git a/test/537-checker-arraycopy/info.txt b/test/537-checker-arraycopy/info.txt
new file mode 100644
index 0000000..ea88f89
--- /dev/null
+++ b/test/537-checker-arraycopy/info.txt
@@ -0,0 +1 @@
+Test for edge cases of System.arraycopy.
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
new file mode 100644
index 0000000..30ccc56
--- /dev/null
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static void main(String[] args) {
+    arraycopy();
+    try {
+      arraycopy(new Object());
+      throw new Error("Should not be here");
+    } catch (ArrayStoreException ase) {
+      // Ignore.
+    }
+    try {
+      arraycopy(null);
+      throw new Error("Should not be here");
+    } catch (NullPointerException npe) {
+      // Ignore.
+    }
+
+    try {
+      arraycopy(new Object[1]);
+      throw new Error("Should not be here");
+    } catch (ArrayIndexOutOfBoundsException aiooe) {
+      // Ignore.
+    }
+
+    arraycopy(new Object[2]);
+    arraycopy(new Object[2], 0);
+
+    try {
+      arraycopy(new Object[1], 1);
+      throw new Error("Should not be here");
+    } catch (ArrayIndexOutOfBoundsException aiooe) {
+      // Ignore.
+    }
+  }
+
+  /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK-NOT:      test
+  /// CHECK-NOT:      call
+  /// CHECK:          ReturnVoid
+  // Checks that the call is intrinsified and that there is no test instruction
+  // when we know the source and destination are not null.
+  public static void arraycopy() {
+    Object[] obj = new Object[4];
+    System.arraycopy(obj, 1, obj, 0, 1);
+  }
+
+  public static void arraycopy(Object obj) {
+    System.arraycopy(obj, 1, obj, 0, 1);
+  }
+
+  public static void arraycopy(Object[] obj, int pos) {
+    System.arraycopy(obj, pos, obj, 0, obj.length);
+  }
+}
diff --git a/test/537-checker-debuggable/expected.txt b/test/537-checker-debuggable/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-debuggable/expected.txt
diff --git a/test/537-checker-debuggable/info.txt b/test/537-checker-debuggable/info.txt
new file mode 100644
index 0000000..25597d3
--- /dev/null
+++ b/test/537-checker-debuggable/info.txt
@@ -0,0 +1 @@
+Test that CHECK-START-DEBUGGABLE runs only on --debuggable code.
\ No newline at end of file
diff --git a/test/537-checker-debuggable/smali/TestCase.smali b/test/537-checker-debuggable/smali/TestCase.smali
new file mode 100644
index 0000000..8e6c7ef
--- /dev/null
+++ b/test/537-checker-debuggable/smali/TestCase.smali
@@ -0,0 +1,42 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+
+.super Ljava/lang/Object;
+
+# The phi in this method has no actual uses but one environment use. It will
+# be eliminated in normal mode but kept live in debuggable mode. Test that
+# Checker runs the correct test for each compilation mode.
+
+## CHECK-START: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK-NOT:         Phi
+
+## CHECK-START-DEBUGGABLE: int TestCase.deadPhi(int, int, int) ssa_builder (after)
+## CHECK:             Phi
+
+.method public static deadPhi(III)I
+  .registers 8
+
+  move v0, p1
+  if-eqz p0, :after
+  move v0, p2
+  :after
+  # v0 = Phi [p1, p2] with no uses
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J  # create an env use
+
+  :return
+  return p2
+.end method
diff --git a/test/537-checker-debuggable/src/Main.java b/test/537-checker-debuggable/src/Main.java
new file mode 100644
index 0000000..a572648
--- /dev/null
+++ b/test/537-checker-debuggable/src/Main.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) { }
+}
diff --git a/test/537-checker-inline-and-unverified/expected.txt b/test/537-checker-inline-and-unverified/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/expected.txt
diff --git a/test/537-checker-inline-and-unverified/info.txt b/test/537-checker-inline-and-unverified/info.txt
new file mode 100644
index 0000000..ec12327
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/info.txt
@@ -0,0 +1 @@
+Checks that unverified methods are not inlined.
diff --git a/test/537-checker-inline-and-unverified/src/Main.java b/test/537-checker-inline-and-unverified/src/Main.java
new file mode 100644
index 0000000..bdc14b0
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/src/Main.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import other.InaccessibleClass;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            testNoInline();
+        } catch (IllegalAccessError e) {
+            // expected
+        }
+        testInline();
+    }
+
+    /// CHECK-START: void Main.testNoInline() inliner (before)
+    /// CHECK: InvokeStaticOrDirect method_name:Main.$opt$noinline$testNoInline
+
+    /// CHECK-START: void Main.testNoInline() inliner (after)
+    /// CHECK: InvokeStaticOrDirect method_name:Main.$opt$noinline$testNoInline
+    public static void testNoInline() {
+        $opt$noinline$testNoInline();
+    }
+
+    /// CHECK-START: void Main.testInline() inliner (before)
+    /// CHECK: InvokeStaticOrDirect method_name:Main.$opt$inline$testInline
+
+    /// CHECK-START: void Main.testInline() inliner (after)
+    /// CHECK-NOT: InvokeStaticOrDirect
+    public static void testInline() {
+        $opt$inline$testInline();
+    }
+
+    public static boolean $opt$noinline$testNoInline() {
+        try {
+            return null instanceof InaccessibleClass;
+        } catch (IllegalAccessError e) {
+            // expected
+        }
+        return false;
+    }
+
+    public static boolean $opt$inline$testInline() {
+        return null instanceof Main;
+    }
+}
diff --git a/test/537-checker-inline-and-unverified/src/other/InaccessibleClass.java b/test/537-checker-inline-and-unverified/src/other/InaccessibleClass.java
new file mode 100644
index 0000000..de2e1d7
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/src/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+public class InaccessibleClass {
+}
diff --git a/test/537-checker-inline-and-unverified/src2/other/InaccessibleClass.java b/test/537-checker-inline-and-unverified/src2/other/InaccessibleClass.java
new file mode 100644
index 0000000..ff11d7a
--- /dev/null
+++ b/test/537-checker-inline-and-unverified/src2/other/InaccessibleClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package other;
+
+/* package */ class InaccessibleClass {
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index db16b97..537873f 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -469,6 +469,8 @@
     530-checker-regression-reftype-final \
     532-checker-nonnull-arrayset \
     534-checker-bce-deoptimization \
+    536-checker-intrinsic-optimization \
+    537-checker-debuggable \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -519,8 +521,10 @@
 
 # Tests that should fail in the read barrier configuration.
 # 137: Read barrier forces interpreter. Cannot run this with the interpreter.
+# 141: Class unloading test is flaky with CC since CC seems to occasionally keep class loaders live.
 TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  141-class-unload
 
 ifeq ($(ART_USE_READ_BARRIER),true)
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
diff --git a/test/run-test b/test/run-test
index a5b6e92..2892ce9 100755
--- a/test/run-test
+++ b/test/run-test
@@ -637,18 +637,24 @@
   # on a particular DEX output, keep building them with dx for now (b/19467889).
   USE_JACK="false"
 
-  if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" -a "$debuggable" = "no" ]; then
+  if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" ]; then
     # In no-prebuild mode, the compiler is only invoked if both dex2oat and
     # patchoat are available. Disable Checker otherwise (b/22552692).
     if [ "$prebuild_mode" = "yes" ] || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
       run_checker="yes"
+
       if [ "$target_mode" = "no" ]; then
         cfg_output_dir="$tmp_dir"
-        checker_arch_option="--arch=${host_arch_name^^}"
+        checker_args="--arch=${host_arch_name^^}"
       else
         cfg_output_dir="$DEX_LOCATION"
-        checker_arch_option="--arch=${target_arch_name^^}"
+        checker_args="--arch=${target_arch_name^^}"
       fi
+
+      if [ "$debuggable" = "yes" ]; then
+        checker_args="$checker_args --debuggable"
+      fi
+
       run_args="${run_args} -Xcompiler-option --dump-cfg=$cfg_output_dir/$cfg_output \
                             -Xcompiler-option -j1"
     fi
@@ -702,7 +708,7 @@
                 if [ "$target_mode" = "yes" ]; then
                   adb pull $cfg_output_dir/$cfg_output &> /dev/null
                 fi
-                "$checker" $checker_arch_option "$cfg_output" "$tmp_dir" 2>&1
+                "$checker" $checker_args "$cfg_output" "$tmp_dir" 2>&1
                 checker_exit="$?"
                 if [ "$checker_exit" = "0" ]; then
                     good="yes"
@@ -727,7 +733,7 @@
           if [ "$target_mode" = "yes" ]; then
             adb pull $cfg_output_dir/$cfg_output &> /dev/null
           fi
-          "$checker" -q $checker_arch_option "$cfg_output" "$tmp_dir" >> "$output" 2>&1
+          "$checker" -q $checker_args "$cfg_output" "$tmp_dir" >> "$output" 2>&1
         fi
         sed -e 's/[[:cntrl:]]$//g' < "$output" >"${td_expected}"
         good="yes"
@@ -768,7 +774,7 @@
             if [ "$target_mode" = "yes" ]; then
               adb pull $cfg_output_dir/$cfg_output &> /dev/null
             fi
-            "$checker" -q $checker_arch_option "$cfg_output" "$tmp_dir" >> "$output" 2>&1
+            "$checker" -q $checker_args "$cfg_output" "$tmp_dir" >> "$output" 2>&1
             checker_exit="$?"
             if [ "$checker_exit" != "0" ]; then
                 echo "checker exit status: $checker_exit" 1>&2
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index a8e3884..1083c2f 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -31,9 +31,6 @@
       another) How about, always sort by name?
  * For long strings, limit the string length shown in the summary view to
    something reasonable.  Say 50 chars, then add a "..." at the end.
- * For string summaries, if the string is an offset into a bigger byte array,
-   make sure to show just the part that's in the bigger byte array, not the
-   entire byte array.
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
  * Should help be part of an AhatHandler, that automatically gets the menu and
@@ -72,6 +69,8 @@
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
    showing all the instances.
+ * That InstanceUtils.asString properly takes into account "offset" and
+   "count" fields, if they are present.
 
 Reported Issues:
  * Request to be able to sort tables by size.
diff --git a/tools/ahat/src/InstanceUtils.java b/tools/ahat/src/InstanceUtils.java
index a6ac3b8..eb9e363 100644
--- a/tools/ahat/src/InstanceUtils.java
+++ b/tools/ahat/src/InstanceUtils.java
@@ -37,22 +37,6 @@
   }
 
   /**
-   * Read the char[] value from an hprof Instance.
-   * Returns null if the object can't be interpreted as a char[].
-   */
-  private static char[] asCharArray(Instance inst) {
-    if (! (inst instanceof ArrayInstance)) {
-      return null;
-    }
-
-    ArrayInstance array = (ArrayInstance) inst;
-    if (array.getArrayType() != Type.CHAR) {
-      return null;
-    }
-    return array.asCharArray(0, array.getValues().length);
-  }
-
-  /**
    * Read the byte[] value from an hprof Instance.
    * Returns null if the instance is not a byte array.
    */
@@ -82,8 +66,32 @@
     if (!isInstanceOfClass(inst, "java.lang.String")) {
       return null;
     }
-    char[] value = getCharArrayField(inst, "value");
-    return (value == null) ? null : new String(value);
+
+    Object value = getField(inst, "value");
+    if (!(value instanceof ArrayInstance)) {
+      return null;
+    }
+
+    ArrayInstance chars = (ArrayInstance) value;
+    if (chars.getArrayType() != Type.CHAR) {
+      return null;
+    }
+
+    // TODO: When perflib provides a better way to get the length of the
+    // array, we should use that here.
+    int numChars = chars.getValues().length;
+    int count = getIntField(inst, "count", numChars);
+    int offset = getIntField(inst, "offset", 0);
+    int end = offset + count - 1;
+
+    if (count == 0) {
+      return "";
+    }
+
+    if (offset >= 0 && offset < numChars && end >= 0 && end < numChars) {
+      return new String(chars.asCharArray(offset, count));
+    }
+    return null;
   }
 
   /**
@@ -175,6 +183,15 @@
   }
 
   /**
+   * Read an int field of an instance, returning a default value if the field
+   * was not an int or could not be read.
+   */
+  private static int getIntField(Instance inst, String fieldName, int def) {
+    Integer value = getIntField(inst, fieldName);
+    return value == null ? def : value;
+  }
+
+  /**
    * Read the given field from the given instance.
    * The field is assumed to be a byte[] field.
    * Returns null if the field value is null, not a byte[] or could not be read.
@@ -187,14 +204,6 @@
     return asByteArray((Instance)value);
   }
 
-  private static char[] getCharArrayField(Instance inst, String fieldName) {
-    Object value = getField(inst, fieldName);
-    if (!(value instanceof Instance)) {
-      return null;
-    }
-    return asCharArray((Instance)value);
-  }
-
   // Return the bitmap instance associated with this object, or null if there
   // is none. This works for android.graphics.Bitmap instances and their
   // underlying Byte[] instances.
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index de9b35d..631e0a0 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -19,7 +19,9 @@
   exit 1
 fi
 
-common_targets="vogar vogar.jar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests"
+out_dir=${OUT_DIR-out}
+java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
+common_targets="vogar vogar.jar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
@@ -44,9 +46,9 @@
 done
 
 if [[ $mode == "host" ]]; then
-  make_command="make $j_arg $showcommands build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so"
+  make_command="make $j_arg $showcommands build-art-host-tests $common_targets ${out_dir}/host/linux-x86/lib/libjavacoretests.so ${out_dir}/host/linux-x86/lib64/libjavacoretests.so"
 elif [[ $mode == "target" ]]; then
-  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh out/host/linux-x86/bin/adb"
+  make_command="make $j_arg $showcommands build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh ${out_dir}/host/linux-x86/bin/adb"
 fi
 
 echo "Executing $make_command"
diff --git a/tools/checker/checker.py b/tools/checker/checker.py
index bc5e17d..2e9faba 100755
--- a/tools/checker/checker.py
+++ b/tools/checker/checker.py
@@ -36,7 +36,9 @@
   parser.add_argument("--dump-pass", dest="dump_pass", metavar="PASS",
                       help="print a compiler pass dump")
   parser.add_argument("--arch", dest="arch", choices=archs_list,
-                      help="Run the tests for the specified target architecture.")
+                      help="Run tests for the specified target architecture.")
+  parser.add_argument("--debuggable", action="store_true",
+                      help="Run tests for debuggable code.")
   parser.add_argument("-q", "--quiet", action="store_true",
                       help="print only errors")
   return parser.parse_args()
@@ -83,13 +85,13 @@
     Logger.fail("Source path \"" + path + "\" not found")
 
 
-def RunTests(checkPrefix, checkPath, outputFilename, targetArch):
+def RunTests(checkPrefix, checkPath, outputFilename, targetArch, debuggableMode):
   c1File = ParseC1visualizerStream(os.path.basename(outputFilename), open(outputFilename, "r"))
   for checkFilename in FindCheckerFiles(checkPath):
     checkerFile = ParseCheckerStream(os.path.basename(checkFilename),
                                      checkPrefix,
                                      open(checkFilename, "r"))
-    MatchFiles(checkerFile, c1File, targetArch)
+    MatchFiles(checkerFile, c1File, targetArch, debuggableMode)
 
 
 if __name__ == "__main__":
@@ -103,4 +105,4 @@
   elif args.dump_pass:
     DumpPass(args.tested_file, args.dump_pass)
   else:
-    RunTests(args.check_prefix, args.source_path, args.tested_file, args.arch)
+    RunTests(args.check_prefix, args.source_path, args.tested_file, args.arch, args.debuggable)
diff --git a/tools/checker/file_format/checker/parser.py b/tools/checker/file_format/checker/parser.py
index 446302f..f199a50 100644
--- a/tools/checker/file_format/checker/parser.py
+++ b/tools/checker/file_format/checker/parser.py
@@ -22,7 +22,7 @@
 def __isCheckerLine(line):
   return line.startswith("///") or line.startswith("##")
 
-def __extractLine(prefix, line, arch = None):
+def __extractLine(prefix, line, arch = None, debuggable = False):
   """ Attempts to parse a check line. The regex searches for a comment symbol
       followed by the CHECK keyword, given attribute and a colon at the very
       beginning of the line. Whitespaces are ignored.
@@ -30,10 +30,11 @@
   rIgnoreWhitespace = r"\s*"
   rCommentSymbols = [r"///", r"##"]
   arch_specifier = r"-%s" % arch if arch is not None else r""
+  dbg_specifier = r"-DEBUGGABLE" if debuggable else r""
   regexPrefix = rIgnoreWhitespace + \
                 r"(" + r"|".join(rCommentSymbols) + r")" + \
                 rIgnoreWhitespace + \
-                prefix + arch_specifier + r":"
+                prefix + arch_specifier + dbg_specifier + r":"
 
   # The 'match' function succeeds only if the pattern is matched at the
   # beginning of the line.
@@ -56,10 +57,11 @@
 
   # Lines beginning with 'CHECK-START' start a new test case.
   # We currently only consider the architecture suffix in "CHECK-START" lines.
-  for arch in [None] + archs_list:
-    startLine = __extractLine(prefix + "-START", line, arch)
-    if startLine is not None:
-      return None, startLine, arch
+  for debuggable in [True, False]:
+    for arch in [None] + archs_list:
+      startLine = __extractLine(prefix + "-START", line, arch, debuggable)
+      if startLine is not None:
+        return None, startLine, (arch, debuggable)
 
   # Lines starting only with 'CHECK' are matched in order.
   plainLine = __extractLine(prefix, line)
@@ -167,9 +169,11 @@
   fnProcessLine = lambda line, lineNo: __processLine(line, lineNo, prefix, fileName)
   fnLineOutsideChunk = lambda line, lineNo: \
       Logger.fail("Checker line not inside a group", fileName, lineNo)
-  for caseName, caseLines, startLineNo, testArch in \
+  for caseName, caseLines, startLineNo, testData in \
       SplitStream(stream, fnProcessLine, fnLineOutsideChunk):
-    testCase = TestCase(checkerFile, caseName, startLineNo, testArch)
+    testArch = testData[0]
+    forDebuggable = testData[1]
+    testCase = TestCase(checkerFile, caseName, startLineNo, testArch, forDebuggable)
     for caseLine in caseLines:
       ParseCheckerAssertion(testCase, caseLine[0], caseLine[1], caseLine[2])
   return checkerFile
diff --git a/tools/checker/file_format/checker/struct.py b/tools/checker/file_format/checker/struct.py
index 7ee09cd..a31aa54 100644
--- a/tools/checker/file_format/checker/struct.py
+++ b/tools/checker/file_format/checker/struct.py
@@ -36,7 +36,7 @@
 
 class TestCase(PrintableMixin):
 
-  def __init__(self, parent, name, startLineNo, testArch = None):
+  def __init__(self, parent, name, startLineNo, testArch = None, forDebuggable = False):
     assert isinstance(parent, CheckerFile)
 
     self.parent = parent
@@ -44,6 +44,7 @@
     self.assertions = []
     self.startLineNo = startLineNo
     self.testArch = testArch
+    self.forDebuggable = forDebuggable
 
     if not self.name:
       Logger.fail("Test case does not have a name", self.fileName, self.startLineNo)
diff --git a/tools/checker/file_format/checker/test.py b/tools/checker/file_format/checker/test.py
index 495dabc..579c190 100644
--- a/tools/checker/file_format/checker/test.py
+++ b/tools/checker/file_format/checker/test.py
@@ -290,7 +290,7 @@
           /// CHECK-NEXT: bar
         """)
 
-class CheckerParser_ArchTests(unittest.TestCase):
+class CheckerParser_SuffixTests(unittest.TestCase):
 
   noarch_block = """
                   /// CHECK-START: Group
@@ -308,11 +308,12 @@
                   /// CHECK-DAG:   yoyo
                 """
 
+  def parse(self, checkerText):
+    return ParseCheckerStream("<test_file>", "CHECK", io.StringIO(ToUnicode(checkerText)))
+
   def test_NonArchTests(self):
     for arch in [None] + archs_list:
-      checkerFile = ParseCheckerStream("<test-file>",
-                                       "CHECK",
-                                       io.StringIO(ToUnicode(self.noarch_block)))
+      checkerFile = self.parse(self.noarch_block)
       self.assertEqual(len(checkerFile.testCases), 1)
       self.assertEqual(len(checkerFile.testCases[0].assertions), 4)
 
@@ -320,9 +321,7 @@
     for targetArch in archs_list:
       for testArch in [a for a in archs_list if a != targetArch]:
         checkerText = self.arch_block.format(test_arch = testArch)
-        checkerFile = ParseCheckerStream("<test-file>",
-                                         "CHECK",
-                                         io.StringIO(ToUnicode(checkerText)))
+        checkerFile = self.parse(checkerText)
         self.assertEqual(len(checkerFile.testCases), 1)
         self.assertEqual(len(checkerFile.testCasesForArch(testArch)), 1)
         self.assertEqual(len(checkerFile.testCasesForArch(targetArch)), 0)
@@ -330,13 +329,42 @@
   def test_Arch(self):
     for arch in archs_list:
       checkerText = self.arch_block.format(test_arch = arch)
-      checkerFile = ParseCheckerStream("<test-file>",
-                                       "CHECK",
-                                       io.StringIO(ToUnicode(checkerText)))
+      checkerFile = self.parse(checkerText)
       self.assertEqual(len(checkerFile.testCases), 1)
       self.assertEqual(len(checkerFile.testCasesForArch(arch)), 1)
       self.assertEqual(len(checkerFile.testCases[0].assertions), 4)
 
+  def test_NoDebugAndArch(self):
+    testCase = self.parse("""
+        /// CHECK-START: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertFalse(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, None)
+
+  def test_SetDebugNoArch(self):
+    testCase = self.parse("""
+        /// CHECK-START-DEBUGGABLE: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertTrue(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, None)
+
+  def test_NoDebugSetArch(self):
+    testCase = self.parse("""
+        /// CHECK-START-ARM: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertFalse(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, "ARM")
+
+  def test_SetDebugAndArch(self):
+    testCase = self.parse("""
+        /// CHECK-START-ARM-DEBUGGABLE: Group
+        /// CHECK: foo
+        """).testCases[0]
+    self.assertTrue(testCase.forDebuggable)
+    self.assertEqual(testCase.testArch, "ARM")
 
 class CheckerParser_EvalTests(unittest.TestCase):
   def parseTestCase(self, string):
diff --git a/tools/checker/match/file.py b/tools/checker/match/file.py
index 6601a1e..3ded074 100644
--- a/tools/checker/match/file.py
+++ b/tools/checker/match/file.py
@@ -159,10 +159,13 @@
     matchFrom = match.scope.end + 1
     variables = match.variables
 
-def MatchFiles(checkerFile, c1File, targetArch):
+def MatchFiles(checkerFile, c1File, targetArch, debuggableMode):
   for testCase in checkerFile.testCases:
     if testCase.testArch not in [None, targetArch]:
       continue
+    if testCase.forDebuggable != debuggableMode:
+      continue
+
     # TODO: Currently does not handle multiple occurrences of the same group
     # name, e.g. when a pass is run multiple times. It will always try to
     # match a check group against the first output group of the same name.
diff --git a/tools/checker/run_unit_tests.py b/tools/checker/run_unit_tests.py
index 2e8f208..a0d274d 100755
--- a/tools/checker/run_unit_tests.py
+++ b/tools/checker/run_unit_tests.py
@@ -19,7 +19,7 @@
 from file_format.checker.test      import CheckerParser_PrefixTest, \
                                           CheckerParser_TestExpressionTest, \
                                           CheckerParser_FileLayoutTest, \
-                                          CheckerParser_ArchTests, \
+                                          CheckerParser_SuffixTests, \
                                           CheckerParser_EvalTests
 from match.test                    import MatchLines_Test, \
                                           MatchFiles_Test
diff --git a/tools/dmtracedump/Android.mk b/tools/dmtracedump/Android.mk
new file mode 100644
index 0000000..da0d632
--- /dev/null
+++ b/tools/dmtracedump/Android.mk
@@ -0,0 +1,32 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Java method trace dump tool
+
+LOCAL_PATH:= $(call my-dir)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := cc
+LOCAL_SRC_FILES := tracedump.cc
+LOCAL_CFLAGS += -O0 -g -Wall
+LOCAL_MODULE_HOST_OS := darwin linux windows
+LOCAL_MODULE := dmtracedump
+include $(BUILD_HOST_EXECUTABLE)
+
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := cc
+LOCAL_SRC_FILES := createtesttrace.cc
+LOCAL_CFLAGS += -O0 -g -Wall
+LOCAL_MODULE := create_test_dmtrace
+include $(BUILD_HOST_EXECUTABLE)
diff --git a/tools/dmtracedump/createtesttrace.cc b/tools/dmtracedump/createtesttrace.cc
new file mode 100644
index 0000000..444cce4
--- /dev/null
+++ b/tools/dmtracedump/createtesttrace.cc
@@ -0,0 +1,449 @@
+/*
+ * Copyright 2015, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Create a test file in the format required by dmtrace.
+ */
+#include "profile.h"  // from VM header
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+/*
+ * Values from the header of the data file.
+ */
+typedef struct DataHeader {
+  uint32_t magic;
+  int16_t version;
+  int16_t offsetToData;
+  int64_t startWhen;
+} DataHeader;
+
+#define VERSION 2
+int32_t versionNumber = VERSION;
+int32_t verbose = 0;
+
+DataHeader header = {0x574f4c53, VERSION, sizeof(DataHeader), 0LL};
+
+const char* versionHeader = "*version\n";
+const char* clockDef = "clock=thread-cpu\n";
+
+const char* keyThreads =
+    "*threads\n"
+    "1      main\n"
+    "2      foo\n"
+    "3      bar\n"
+    "4      blah\n";
+
+const char* keyEnd = "*end\n";
+
+typedef struct dataRecord {
+  uint32_t time;
+  int32_t threadId;
+  uint32_t action; /* 0=entry, 1=exit, 2=exception exit */
+  char* fullName;
+  char* className;
+  char* methodName;
+  char* signature;
+  uint32_t methodId;
+} dataRecord;
+
+dataRecord* records;
+
+#define BUF_SIZE 1024
+char buf[BUF_SIZE];
+
+typedef struct stack {
+  dataRecord** frames;
+  int32_t indentLevel;
+} stack;
+
+/* Mac OS doesn't have strndup(), so implement it here.
+ */
+char* strndup(const char* src, size_t len) {
+  char* dest = new char[len + 1];
+  strncpy(dest, src, len);
+  dest[len] = 0;
+  return dest;
+}
+
+/*
+ * Parse the input file.  It looks something like this:
+ * # This is a comment line
+ * 4  1 A
+ * 6  1  B
+ * 8  1  B
+ * 10 1 A
+ *
+ * where the first column is the time, the second column is the thread id,
+ * and the third column is the method (actually just the class name).  The
+ * number of spaces between the 2nd and 3rd columns is the indentation and
+ * determines the call stack.  Each called method must be indented by one
+ * more space.  In the example above, A is called at time 4, A calls B at
+ * time 6, B returns at time 8, and A returns at time 10.  Thread 1 is the
+ * only thread that is running.
+ *
+ * An alternative file format leaves out the first two columns:
+ * A
+ *  B
+ *  B
+ * A
+ *
+ * In this file format, the thread id is always 1, and the time starts at
+ * 2 and increments by 2 for each line.
+ */
+void parseInputFile(const char* inputFileName) {
+  FILE* inputFp = fopen(inputFileName, "r");
+  if (inputFp == nullptr) {
+    perror(inputFileName);
+    exit(1);
+  }
+
+  /* Count the number of lines in the buffer */
+  int32_t numRecords = 0;
+  int32_t maxThreadId = 1;
+  int32_t maxFrames = 0;
+  char* indentEnd;
+  while (fgets(buf, BUF_SIZE, inputFp)) {
+    char* cp = buf;
+    if (*cp == '#') continue;
+    numRecords += 1;
+    if (isdigit(*cp)) {
+      while (isspace(*cp)) cp += 1;
+      int32_t threadId = strtoul(cp, &cp, 0);
+      if (maxThreadId < threadId) maxThreadId = threadId;
+    }
+    indentEnd = cp;
+    while (isspace(*indentEnd)) indentEnd += 1;
+    if (indentEnd - cp + 1 > maxFrames) maxFrames = indentEnd - cp + 1;
+  }
+  int32_t numThreads = maxThreadId + 1;
+
+  /* Add space for a sentinel record at the end */
+  numRecords += 1;
+  records = new dataRecord[numRecords];
+  stack* callStack = new stack[numThreads];
+  for (int32_t ii = 0; ii < numThreads; ++ii) {
+    callStack[ii].frames = nullptr;
+    callStack[ii].indentLevel = 0;
+  }
+
+  rewind(inputFp);
+
+  uint32_t time = 0;
+  int32_t linenum = 0;
+  int32_t nextRecord = 0;
+  int32_t indentLevel = 0;
+  while (fgets(buf, BUF_SIZE, inputFp)) {
+    uint32_t threadId;
+    int32_t len;
+    int32_t indent;
+    int32_t action;
+    char* save_cp;
+
+    linenum += 1;
+    char* cp = buf;
+
+    /* Skip lines that start with '#' */
+    if (*cp == '#') continue;
+
+    /* Get time and thread id */
+    if (!isdigit(*cp)) {
+      /* If the line does not begin with a digit, then fill in
+       * default values for the time and threadId.
+       */
+      time += 2;
+      threadId = 1;
+    } else {
+      time = strtoul(cp, &cp, 0);
+      while (isspace(*cp)) cp += 1;
+      threadId = strtoul(cp, &cp, 0);
+      cp += 1;
+    }
+
+    // Allocate space for the thread stack, if necessary
+    if (callStack[threadId].frames == nullptr) {
+      dataRecord** stk = new dataRecord*[maxFrames];
+      callStack[threadId].frames = stk;
+    }
+    indentLevel = callStack[threadId].indentLevel;
+
+    save_cp = cp;
+    while (isspace(*cp)) {
+      cp += 1;
+    }
+    indent = cp - save_cp + 1;
+    records[nextRecord].time = time;
+    records[nextRecord].threadId = threadId;
+
+    save_cp = cp;
+    while (*cp != '\n') cp += 1;
+
+    /* Remove trailing spaces */
+    cp -= 1;
+    while (isspace(*cp)) cp -= 1;
+    cp += 1;
+    len = cp - save_cp;
+    records[nextRecord].fullName = strndup(save_cp, len);
+
+    /* Parse the name to support "class.method signature" */
+    records[nextRecord].className = nullptr;
+    records[nextRecord].methodName = nullptr;
+    records[nextRecord].signature = nullptr;
+    cp = strchr(save_cp, '.');
+    if (cp) {
+      len = cp - save_cp;
+      if (len > 0) records[nextRecord].className = strndup(save_cp, len);
+      save_cp = cp + 1;
+      cp = strchr(save_cp, ' ');
+      if (cp == nullptr) cp = strchr(save_cp, '\n');
+      if (cp && cp > save_cp) {
+        len = cp - save_cp;
+        records[nextRecord].methodName = strndup(save_cp, len);
+        save_cp = cp + 1;
+        cp = strchr(save_cp, ' ');
+        if (cp == nullptr) cp = strchr(save_cp, '\n');
+        if (cp && cp > save_cp) {
+          len = cp - save_cp;
+          records[nextRecord].signature = strndup(save_cp, len);
+        }
+      }
+    }
+
+    if (verbose) {
+      printf("Indent: %d; IndentLevel: %d; Line: %s", indent, indentLevel, buf);
+    }
+
+    action = 0;
+    if (indent == indentLevel + 1) {  // Entering a method
+      if (verbose) printf("  Entering %s\n", records[nextRecord].fullName);
+      callStack[threadId].frames[indentLevel] = &records[nextRecord];
+    } else if (indent == indentLevel) {  // Exiting a method
+      // Exiting method must be currently on top of stack (unless stack is
+      // empty)
+      if (callStack[threadId].frames[indentLevel - 1] == nullptr) {
+        if (verbose)
+          printf("  Exiting %s (past bottom of stack)\n",
+                 records[nextRecord].fullName);
+        callStack[threadId].frames[indentLevel - 1] = &records[nextRecord];
+        action = 1;
+      } else {
+        if (indentLevel < 1) {
+          fprintf(stderr, "Error: line %d: %s", linenum, buf);
+          fprintf(stderr, "  expected positive (>0) indentation, found %d\n",
+                  indent);
+          exit(1);
+        }
+        char* name = callStack[threadId].frames[indentLevel - 1]->fullName;
+        if (strcmp(name, records[nextRecord].fullName) == 0) {
+          if (verbose) printf("  Exiting %s\n", name);
+          action = 1;
+        } else {  // exiting method doesn't match stack's top method
+          fprintf(stderr, "Error: line %d: %s", linenum, buf);
+          fprintf(stderr, "  expected exit from %s\n",
+                  callStack[threadId].frames[indentLevel - 1]->fullName);
+          exit(1);
+        }
+      }
+    } else {
+      if (nextRecord != 0) {
+        fprintf(stderr, "Error: line %d: %s", linenum, buf);
+        fprintf(stderr, "  expected indentation %d [+1], found %d\n",
+                indentLevel, indent);
+        exit(1);
+      }
+
+      if (verbose) {
+        printf("  Nonzero indent at first record\n");
+        printf("  Entering %s\n", records[nextRecord].fullName);
+      }
+
+      // This is the first line of data, so we allow a larger
+      // initial indent.  This allows us to test popping off more
+      // frames than we entered.
+      indentLevel = indent - 1;
+      callStack[threadId].frames[indentLevel] = &records[nextRecord];
+    }
+
+    if (action == 0)
+      indentLevel += 1;
+    else
+      indentLevel -= 1;
+    records[nextRecord].action = action;
+    callStack[threadId].indentLevel = indentLevel;
+
+    nextRecord += 1;
+  }
+
+  /* Mark the last record with a sentinel */
+  memset(&records[nextRecord], 0, sizeof(dataRecord));
+}
+
+/*
+ * Write values to the binary data file.
+ */
+void write2LE(FILE* fp, uint16_t val) {
+  putc(val & 0xff, fp);
+  putc(val >> 8, fp);
+}
+
+void write4LE(FILE* fp, uint32_t val) {
+  putc(val & 0xff, fp);
+  putc((val >> 8) & 0xff, fp);
+  putc((val >> 16) & 0xff, fp);
+  putc((val >> 24) & 0xff, fp);
+}
+
+void write8LE(FILE* fp, uint64_t val) {
+  putc(val & 0xff, fp);
+  putc((val >> 8) & 0xff, fp);
+  putc((val >> 16) & 0xff, fp);
+  putc((val >> 24) & 0xff, fp);
+  putc((val >> 32) & 0xff, fp);
+  putc((val >> 40) & 0xff, fp);
+  putc((val >> 48) & 0xff, fp);
+  putc((val >> 56) & 0xff, fp);
+}
+
+void writeDataRecord(FILE* dataFp, int32_t threadId, uint32_t methodVal, uint32_t elapsedTime) {
+  if (versionNumber == 1)
+    putc(threadId, dataFp);
+  else
+    write2LE(dataFp, threadId);
+  write4LE(dataFp, methodVal);
+  write4LE(dataFp, elapsedTime);
+}
+
+void writeDataHeader(FILE* dataFp) {
+  struct timeval tv;
+  struct timezone tz;
+
+  gettimeofday(&tv, &tz);
+  uint64_t startTime = tv.tv_sec;
+  startTime = (startTime << 32) | tv.tv_usec;
+  header.version = versionNumber;
+  write4LE(dataFp, header.magic);
+  write2LE(dataFp, header.version);
+  write2LE(dataFp, header.offsetToData);
+  write8LE(dataFp, startTime);
+}
+
+void writeKeyMethods(FILE* keyFp) {
+  const char* methodStr = "*methods\n";
+  fwrite(methodStr, strlen(methodStr), 1, keyFp);
+
+  /* Assign method ids in multiples of 4 */
+  uint32_t methodId = 0;
+  for (dataRecord* pRecord = records; pRecord->fullName; ++pRecord) {
+    if (pRecord->methodId) continue;
+    uint32_t id = ++methodId << 2;
+    pRecord->methodId = id;
+
+    /* Assign this id to all the other records that have the
+     * same name.
+     */
+    for (dataRecord* pNext = pRecord + 1; pNext->fullName; ++pNext) {
+      if (pNext->methodId) continue;
+      if (strcmp(pRecord->fullName, pNext->fullName) == 0) pNext->methodId = id;
+    }
+    if (pRecord->className == nullptr || pRecord->methodName == nullptr) {
+      fprintf(keyFp, "%#x        %s      m       ()\n", pRecord->methodId,
+              pRecord->fullName);
+    } else if (pRecord->signature == nullptr) {
+      fprintf(keyFp, "%#x        %s      %s      ()\n", pRecord->methodId,
+              pRecord->className, pRecord->methodName);
+    } else {
+      fprintf(keyFp, "%#x        %s      %s      %s\n", pRecord->methodId,
+              pRecord->className, pRecord->methodName, pRecord->signature);
+    }
+  }
+}
+
+void writeKeys(FILE* keyFp) {
+  fprintf(keyFp, "%s%d\n%s", versionHeader, versionNumber, clockDef);
+  fwrite(keyThreads, strlen(keyThreads), 1, keyFp);
+  writeKeyMethods(keyFp);
+  fwrite(keyEnd, strlen(keyEnd), 1, keyFp);
+}
+
+void writeDataRecords(FILE* dataFp) {
+  for (dataRecord* pRecord = records; pRecord->fullName; ++pRecord) {
+    uint32_t val = METHOD_COMBINE(pRecord->methodId, pRecord->action);
+    writeDataRecord(dataFp, pRecord->threadId, val, pRecord->time);
+  }
+}
+
+void writeTrace(const char* traceFileName) {
+  FILE* fp = fopen(traceFileName, "w");
+  if (fp == nullptr) {
+    perror(traceFileName);
+    exit(1);
+  }
+  writeKeys(fp);
+  writeDataHeader(fp);
+  writeDataRecords(fp);
+  fclose(fp);
+}
+
+int32_t parseOptions(int32_t argc, char** argv) {
+  int32_t err = 0;
+  while (1) {
+    int32_t opt = getopt(argc, argv, "v:d");
+    if (opt == -1) break;
+    switch (opt) {
+      case 'v':
+        versionNumber = strtoul(optarg, nullptr, 0);
+        if (versionNumber != 1 && versionNumber != 2) {
+          fprintf(stderr, "Error: version number (%d) must be 1 or 2\n", versionNumber);
+          err = 1;
+        }
+        break;
+      case 'd':
+        verbose = 1;
+        break;
+      default:
+        err = 1;
+        break;
+    }
+  }
+  return err;
+}
+
+int32_t main(int32_t argc, char** argv) {
+  char* inputFile;
+  char* traceFileName = nullptr;
+
+  if (parseOptions(argc, argv) || argc - optind != 2) {
+    fprintf(stderr, "Usage: %s [-v version] [-d] input_file trace_prefix\n", argv[0]);
+    exit(1);
+  }
+
+  inputFile = argv[optind++];
+  parseInputFile(inputFile);
+  traceFileName = argv[optind++];
+
+  writeTrace(traceFileName);
+
+  return 0;
+}
diff --git a/tools/dmtracedump/dmtracedump.pl b/tools/dmtracedump/dmtracedump.pl
new file mode 100755
index 0000000..6e487c6
--- /dev/null
+++ b/tools/dmtracedump/dmtracedump.pl
@@ -0,0 +1,18 @@
+#!/usr/bin/perl
+
+opendir(DIR, ".") || die "can't opendir $some_dir: $!";
+@traces = grep { /.*\.dmtrace\.data/ } readdir(DIR);
+
+foreach (@traces)
+{
+    $input = $_;
+    $input =~ s/\.data$//;
+
+    $output = "$input.html";
+
+    print("dmtracedump -h -p $input > $output\n");
+    system("dmtracedump -h -p '$input' > '$output'");
+
+}
+
+closedir DIR;
diff --git a/tools/dmtracedump/dumpdir.sh b/tools/dmtracedump/dumpdir.sh
new file mode 100644
index 0000000..81992a2
--- /dev/null
+++ b/tools/dmtracedump/dumpdir.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+FILES=`ls $1/*.data | sed "s/^\\(.*\\).data$/\\1/"`
+
+mkdir -p $2
+
+for F in $FILES
+do
+    G=$2/`echo $F | sed "s/.*\\///g"`.html
+    dmtracedump -h -p $F > $G
+done
diff --git a/tools/dmtracedump/profile.h b/tools/dmtracedump/profile.h
new file mode 100644
index 0000000..8182352
--- /dev/null
+++ b/tools/dmtracedump/profile.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Android's method call profiling goodies.
+ */
+#ifndef ART_TOOLS_DMTRACEDUMP_PROFILE_H_
+#define ART_TOOLS_DMTRACEDUMP_PROFILE_H_
+
+/*
+ * Enumeration for the two "action" bits.
+ */
+enum {
+  METHOD_TRACE_ENTER = 0x00,   // method entry
+  METHOD_TRACE_EXIT = 0x01,    // method exit
+  METHOD_TRACE_UNROLL = 0x02,  // method exited by exception unrolling
+  // 0x03 currently unused
+};
+
+#define TOKEN_CHAR '*'
+
+/*
+ * Common definitions, shared with the dump tool.
+ */
+#define METHOD_ACTION_MASK 0x03 /* two bits */
+#define METHOD_ID(_method) ((_method) & (~METHOD_ACTION_MASK))
+#define METHOD_ACTION(_method) (((unsigned int)(_method)) & METHOD_ACTION_MASK)
+#define METHOD_COMBINE(_method, _action) ((_method) | (_action))
+
+#endif  // ART_TOOLS_DMTRACEDUMP_PROFILE_H_
diff --git a/tools/dmtracedump/tracedump.cc b/tools/dmtracedump/tracedump.cc
new file mode 100644
index 0000000..f70e2c2
--- /dev/null
+++ b/tools/dmtracedump/tracedump.cc
@@ -0,0 +1,2616 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Process dmtrace output.
+ *
+ * This is the wrong way to go about it -- C is a clumsy language for
+ * shuffling data around.  It'll do for a first pass.
+ */
+#include "profile.h"  // from VM header
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+/* Version number in the key file.
+ * Version 1 uses one byte for the thread id.
+ * Version 2 uses two bytes for the thread ids.
+ * Version 3 encodes the record size and adds an optional extra timestamp field.
+ */
+int32_t versionNumber;
+
+/* arbitrarily limit indentation */
+#define MAX_STACK_DEPTH 10000
+
+/* thread list in key file is not reliable, so just max out */
+#define MAX_THREADS 32768
+
+/* Size of temporary buffers for escaping html strings */
+#define HTML_BUFSIZE 10240
+
+const char* htmlHeader =
+    "<html>\n<head>\n<script type=\"text/javascript\" "
+    "src=\"%ssortable.js\"></script>\n"
+    "<script langugage=\"javascript\">\n"
+    "function toggle(item) {\n"
+    "    obj=document.getElementById(item);\n"
+    "    visible=(obj.style.display!=\"none\" && obj.style.display!=\"\");\n"
+    "    key=document.getElementById(\"x\" + item);\n"
+    "    if (visible) {\n"
+    "        obj.style.display=\"none\";\n"
+    "        key.innerHTML=\"+\";\n"
+    "    } else {\n"
+    "        obj.style.display=\"block\";\n"
+    "        key.innerHTML=\"-\";\n"
+    "    }\n"
+    "}\n"
+    "function onMouseOver(obj) {\n"
+    "    obj.style.background=\"lightblue\";\n"
+    "}\n"
+    "function onMouseOut(obj) {\n"
+    "    obj.style.background=\"white\";\n"
+    "}\n"
+    "</script>\n"
+    "<style type=\"text/css\">\n"
+    "div { font-family: courier; font-size: 13 }\n"
+    "div.parent { margin-left: 15; display: none }\n"
+    "div.leaf { margin-left: 10 }\n"
+    "div.header { margin-left: 10 }\n"
+    "div.link { margin-left: 10; cursor: move }\n"
+    "span.parent { padding-right: 10; }\n"
+    "span.leaf { padding-right: 10; }\n"
+    "a img { border: 0;}\n"
+    "table.sortable th { border-width: 0px 1px 1px 1px; background-color: "
+    "#ccc;}\n"
+    "a { text-decoration: none; }\n"
+    "a:hover { text-decoration: underline; }\n"
+    "table.sortable th, table.sortable td { text-align: left;}"
+    "table.sortable tr.odd td { background-color: #ddd; }\n"
+    "table.sortable tr.even td { background-color: #fff; }\n"
+    "</style>\n"
+    "</head><body>\n\n";
+
+const char* htmlFooter = "\n</body>\n</html>\n";
+const char* profileSeparator =
+    "======================================================================";
+
+const char* tableHeader =
+    "<table class='sortable' id='%s'><tr>\n"
+    "<th>Method</th>\n"
+    "<th>Run 1 (us)</th>\n"
+    "<th>Run 2 (us)</th>\n"
+    "<th>Diff (us)</th>\n"
+    "<th>Diff (%%)</th>\n"
+    "<th>1: # calls</th>\n"
+    "<th>2: # calls</th>\n"
+    "</tr>\n";
+
+const char* tableHeaderMissing =
+    "<table class='sortable' id='%s'>\n"
+    "<th>Method</th>\n"
+    "<th>Exclusive</th>\n"
+    "<th>Inclusive</th>\n"
+    "<th># calls</th>\n";
+
+#define GRAPH_LABEL_VISITED 0x0001
+#define GRAPH_NODE_VISITED 0x0002
+
+/*
+ * Values from the header of the data file.
+ */
+typedef struct DataHeader {
+  uint32_t magic;
+  int16_t version;
+  int16_t offsetToData;
+  int64_t startWhen;
+  int16_t recordSize;
+} DataHeader;
+
+/*
+ * Entry from the thread list.
+ */
+typedef struct ThreadEntry {
+  int32_t threadId;
+  const char* threadName;
+} ThreadEntry;
+
+struct MethodEntry;
+typedef struct TimedMethod {
+  struct TimedMethod* next;
+  uint64_t elapsedInclusive;
+  int32_t numCalls;
+  struct MethodEntry* method;
+} TimedMethod;
+
+typedef struct ClassEntry {
+  const char* className;
+  uint64_t elapsedExclusive;
+  int32_t numMethods;
+  struct MethodEntry** methods; /* list of methods in this class */
+  int32_t numCalls[2];              /* 0=normal, 1=recursive */
+} ClassEntry;
+
+typedef struct UniqueMethodEntry {
+  uint64_t elapsedExclusive;
+  int32_t numMethods;
+  struct MethodEntry** methods; /* list of methods with same name */
+  int32_t numCalls[2];              /* 0=normal, 1=recursive */
+} UniqueMethodEntry;
+
+/*
+ * Entry from the method list.
+ */
+typedef struct MethodEntry {
+  int64_t methodId;
+  const char* className;
+  const char* methodName;
+  const char* signature;
+  const char* fileName;
+  int32_t lineNum;
+  uint64_t elapsedExclusive;
+  uint64_t elapsedInclusive;
+  uint64_t topExclusive; /* non-recursive exclusive time */
+  uint64_t recursiveInclusive;
+  struct TimedMethod* parents[2];  /* 0=normal, 1=recursive */
+  struct TimedMethod* children[2]; /* 0=normal, 1=recursive */
+  int32_t numCalls[2];             /* 0=normal, 1=recursive */
+  int32_t index;                   /* used after sorting to number methods */
+  int32_t recursiveEntries;        /* number of entries on the stack */
+  int32_t graphState; /* used when graphing to see if this method has been visited before */
+} MethodEntry;
+
+/*
+ * The parsed contents of the key file.
+ */
+typedef struct DataKeys {
+  char* fileData; /* contents of the entire file */
+  int64_t fileLen;
+  int32_t numThreads;
+  ThreadEntry* threads;
+  int32_t numMethods;
+  MethodEntry* methods; /* 2 extra methods: "toplevel" and "unknown" */
+} DataKeys;
+
+#define TOPLEVEL_INDEX 0
+#define UNKNOWN_INDEX 1
+
+typedef struct StackEntry {
+  MethodEntry* method;
+  uint64_t entryTime;
+} StackEntry;
+
+typedef struct CallStack {
+  int32_t top;
+  StackEntry calls[MAX_STACK_DEPTH];
+  uint64_t lastEventTime;
+  uint64_t threadStartTime;
+} CallStack;
+
+typedef struct DiffEntry {
+  MethodEntry* method1;
+  MethodEntry* method2;
+  int64_t differenceExclusive;
+  int64_t differenceInclusive;
+  double differenceExclusivePercentage;
+  double differenceInclusivePercentage;
+} DiffEntry;
+
+// Global options
+typedef struct Options {
+  const char* traceFileName;
+  const char* diffFileName;
+  const char* graphFileName;
+  int32_t keepDotFile;
+  int32_t dump;
+  int32_t outputHtml;
+  const char* sortableUrl;
+  int32_t threshold;
+} Options;
+
+typedef struct TraceData {
+  int32_t numClasses;
+  ClassEntry* classes;
+  CallStack* stacks[MAX_THREADS];
+  int32_t depth[MAX_THREADS];
+  int32_t numUniqueMethods;
+  UniqueMethodEntry* uniqueMethods;
+} TraceData;
+
+static Options gOptions;
+
+/* Escapes characters in the source string that are html special entities.
+ * The escaped string is written to "dest" which must be large enough to
+ * hold the result.  A pointer to "dest" is returned.  The characters and
+ * their corresponding escape sequences are:
+ *  '<'  &lt;
+ *  '>'  &gt;
+ *  '&'  &amp;
+ */
+char* htmlEscape(const char* src, char* dest, int32_t len) {
+  char* destStart = dest;
+
+  if (src == nullptr) return nullptr;
+
+  int32_t nbytes = 0;
+  while (*src) {
+    if (*src == '<') {
+      nbytes += 4;
+      if (nbytes >= len) break;
+      *dest++ = '&';
+      *dest++ = 'l';
+      *dest++ = 't';
+      *dest++ = ';';
+    } else if (*src == '>') {
+      nbytes += 4;
+      if (nbytes >= len) break;
+      *dest++ = '&';
+      *dest++ = 'g';
+      *dest++ = 't';
+      *dest++ = ';';
+    } else if (*src == '&') {
+      nbytes += 5;
+      if (nbytes >= len) break;
+      *dest++ = '&';
+      *dest++ = 'a';
+      *dest++ = 'm';
+      *dest++ = 'p';
+      *dest++ = ';';
+    } else {
+      nbytes += 1;
+      if (nbytes >= len) break;
+      *dest++ = *src;
+    }
+    src += 1;
+  }
+  if (nbytes >= len) {
+    fprintf(stderr, "htmlEscape(): buffer overflow\n");
+    exit(1);
+  }
+  *dest = 0;
+
+  return destStart;
+}
+
+/* Initializes a MethodEntry
+ */
+void initMethodEntry(MethodEntry* method, int64_t methodId, const char* className,
+                     const char* methodName, const char* signature, const char* fileName,
+                     const char* lineNumStr) {
+  method->methodId = methodId;
+  method->className = className;
+  method->methodName = methodName;
+  method->signature = signature;
+  method->fileName = fileName;
+  method->lineNum = (lineNumStr != nullptr) ? atoi(lineNumStr) : -1;
+  method->elapsedExclusive = 0;
+  method->elapsedInclusive = 0;
+  method->topExclusive = 0;
+  method->recursiveInclusive = 0;
+  method->parents[0] = nullptr;
+  method->parents[1] = nullptr;
+  method->children[0] = nullptr;
+  method->children[1] = nullptr;
+  method->numCalls[0] = 0;
+  method->numCalls[1] = 0;
+  method->index = 0;
+  method->recursiveEntries = 0;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * methods into decreasing order of exclusive elapsed time.
+ */
+int32_t compareElapsedExclusive(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  uint64_t elapsed1 = methodA->elapsedExclusive;
+  uint64_t elapsed2 = methodB->elapsedExclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+    result = strcmp(methodA->methodName, methodB->methodName);
+    if (result == 0) result = strcmp(methodA->signature, methodB->signature);
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * methods into decreasing order of inclusive elapsed time.
+ */
+int32_t compareElapsedInclusive(const void* a, const void* b) {
+  const MethodEntry* methodA = *(MethodEntry const**) a;
+  const MethodEntry* methodB = *(MethodEntry const**) b;
+  uint64_t elapsed1 = methodA->elapsedInclusive;
+  uint64_t elapsed2 = methodB->elapsedInclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+    result = strcmp(methodA->methodName, methodB->methodName);
+    if (result == 0) result = strcmp(methodA->signature, methodB->signature);
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * TimedMethods into decreasing order of inclusive elapsed time.
+ */
+int32_t compareTimedMethod(const void* a, const void* b) {
+  const TimedMethod* timedA = (TimedMethod const*) a;
+  const TimedMethod* timedB = (TimedMethod const*) b;
+  uint64_t elapsed1 = timedA->elapsedInclusive;
+  uint64_t elapsed2 = timedB->elapsedInclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  MethodEntry* methodA = timedA->method;
+  MethodEntry* methodB = timedB->method;
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+    result = strcmp(methodA->methodName, methodB->methodName);
+    if (result == 0) result = strcmp(methodA->signature, methodB->signature);
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * MethodEntry pointers into alphabetical order of class names.
+ */
+int32_t compareClassNames(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  int32_t result = strcmp(methodA->className, methodB->className);
+  if (result == 0) {
+    int64_t idA = methodA->methodId;
+    int64_t idB = methodB->methodId;
+    if (idA < idB) return -1;
+    if (idA > idB) return 1;
+    return 0;
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * classes into decreasing order of exclusive elapsed time.
+ */
+int32_t compareClassExclusive(const void* a, const void* b) {
+  const ClassEntry* classA = *(const ClassEntry**) a;
+  const ClassEntry* classB = *(const ClassEntry**) b;
+  uint64_t elapsed1 = classA->elapsedExclusive;
+  uint64_t elapsed2 = classB->elapsedExclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two classs are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(classA->className, classB->className);
+  if (result == 0) {
+    /* Break ties with the first method id.  This is probably not
+     * needed.
+     */
+    int64_t idA = classA->methods[0]->methodId;
+    int64_t idB = classB->methods[0]->methodId;
+    if (idA < idB) return -1;
+    if (idA > idB) return 1;
+    return 0;
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * MethodEntry pointers into alphabetical order by method name,
+ * then by class name.
+ */
+int32_t compareMethodNames(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+    return compareClassNames(a, b);
+  }
+  int32_t result = strcmp(methodA->methodName, methodB->methodName);
+  if (result == 0) {
+    result = strcmp(methodA->className, methodB->className);
+    if (result == 0) {
+      int64_t idA = methodA->methodId;
+      int64_t idB = methodB->methodId;
+      if (idA < idB) return -1;
+      if (idA > idB) return 1;
+      return 0;
+    }
+  }
+  return result;
+}
+
+/*
+ * This comparison function is called from qsort() to sort
+ * unique methods into decreasing order of exclusive elapsed time.
+ */
+int32_t compareUniqueExclusive(const void* a, const void* b) {
+  const UniqueMethodEntry* uniqueA = *(const UniqueMethodEntry**) a;
+  const UniqueMethodEntry* uniqueB = *(const UniqueMethodEntry**) b;
+  uint64_t elapsed1 = uniqueA->elapsedExclusive;
+  uint64_t elapsed2 = uniqueB->elapsedExclusive;
+  if (elapsed1 < elapsed2) return 1;
+  if (elapsed1 > elapsed2) return -1;
+
+  /* If the elapsed times of two methods are equal, then sort them
+   * into alphabetical order.
+   */
+  int32_t result = strcmp(uniqueA->methods[0]->className, uniqueB->methods[0]->className);
+  if (result == 0) {
+    int64_t idA = uniqueA->methods[0]->methodId;
+    int64_t idB = uniqueB->methods[0]->methodId;
+    if (idA < idB) return -1;
+    if (idA > idB) return 1;
+    return 0;
+  }
+  return result;
+}
+
+/*
+ * Free a DataKeys struct.
+ */
+void freeDataKeys(DataKeys* pKeys) {
+  if (pKeys == nullptr) return;
+
+  free(pKeys->fileData);
+  free(pKeys->threads);
+  free(pKeys->methods);
+  free(pKeys);
+}
+
+/*
+ * Find the offset to the next occurrence of the specified character.
+ *
+ * "data" should point somewhere within the current line.  "len" is the
+ * number of bytes left in the buffer.
+ *
+ * Returns -1 if we hit the end of the buffer.
+ */
+int32_t findNextChar(const char* data, int32_t len, char lookFor) {
+  const char* start = data;
+
+  while (len > 0) {
+    if (*data == lookFor) return data - start;
+
+    data++;
+    len--;
+  }
+
+  return -1;
+}
+
+/*
+ * Count the number of lines until the next token.
+ *
+ * Returns -1 if none found before EOF.
+ */
+int32_t countLinesToToken(const char* data, int32_t len) {
+  int32_t count = 0;
+  int32_t next;
+
+  while (*data != TOKEN_CHAR) {
+    next = findNextChar(data, len, '\n');
+    if (next < 0) return -1;
+    count++;
+    data += next + 1;
+    len -= next + 1;
+  }
+
+  return count;
+}
+
+/*
+ * Make sure we're at the start of the right section.
+ *
+ * Returns the length of the token line, or -1 if something is wrong.
+ */
+int32_t checkToken(const char* data, int32_t len, const char* cmpStr) {
+  int32_t cmpLen = strlen(cmpStr);
+  int32_t next;
+
+  if (*data != TOKEN_CHAR) {
+    fprintf(stderr, "ERROR: not at start of %s (found '%.10s')\n", cmpStr, data);
+    return -1;
+  }
+
+  next = findNextChar(data, len, '\n');
+  if (next < cmpLen + 1) return -1;
+
+  if (strncmp(data + 1, cmpStr, cmpLen) != 0) {
+    fprintf(stderr, "ERROR: '%s' not found (got '%.7s')\n", cmpStr, data + 1);
+    return -1;
+  }
+
+  return next + 1;
+}
+
+/*
+ * Parse the "*version" section.
+ */
+int64_t parseVersion(DataKeys* pKeys, int64_t offset, int32_t verbose) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "version");
+  if (next <= 0) return -1;
+
+  data += next;
+
+  /*
+   * Count the number of items in the "version" section.
+   */
+  int32_t count = countLinesToToken(data, dataEnd - data);
+  if (count <= 0) {
+    fprintf(stderr, "ERROR: failed while reading version (found %d)\n", count);
+    return -1;
+  }
+
+  /* find the end of the line */
+  next = findNextChar(data, dataEnd - data, '\n');
+  if (next < 0) return -1;
+
+  data[next] = '\0';
+  versionNumber = strtoul(data, nullptr, 0);
+  if (verbose) printf("VERSION: %d\n", versionNumber);
+
+  data += next + 1;
+
+  /* skip over the rest of the stuff, which is "name=value" lines */
+  for (int32_t i = 1; i < count; i++) {
+    next = findNextChar(data, dataEnd - data, '\n');
+    if (next < 0) return -1;
+    // data[next] = '\0';
+    // printf("IGNORING: '%s'\n", data);
+    data += next + 1;
+  }
+
+  return data - pKeys->fileData;
+}
+
+/*
+ * Parse the "*threads" section.
+ */
+int64_t parseThreads(DataKeys* pKeys, int64_t offset) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "threads");
+
+  data += next;
+
+  /*
+   * Count the number of thread entries (one per line).
+   */
+  int32_t count = countLinesToToken(data, dataEnd - data);
+  if (count <= 0) {
+    fprintf(stderr, "ERROR: failed while reading threads (found %d)\n", count);
+    return -1;
+  }
+
+  // printf("+++ found %d threads\n", count);
+  pKeys->threads = new ThreadEntry[count];
+  if (pKeys->threads == nullptr) return -1;
+
+  /*
+   * Extract all entries.
+   */
+  for (int32_t i = 0; i < count; i++) {
+    next = findNextChar(data, dataEnd - data, '\n');
+    assert(next > 0);
+    data[next] = '\0';
+
+    int32_t tab = findNextChar(data, next, '\t');
+    data[tab] = '\0';
+
+    pKeys->threads[i].threadId = atoi(data);
+    pKeys->threads[i].threadName = data + tab + 1;
+
+    data += next + 1;
+  }
+
+  pKeys->numThreads = count;
+  return data - pKeys->fileData;
+}
+
+/*
+ * Parse the "*methods" section.
+ */
+int64_t parseMethods(DataKeys* pKeys, int64_t offset) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "methods");
+  if (next < 0) return -1;
+
+  data += next;
+
+  /*
+   * Count the number of method entries (one per line).
+   */
+  int32_t count = countLinesToToken(data, dataEnd - data);
+  if (count <= 0) {
+    fprintf(stderr, "ERROR: failed while reading methods (found %d)\n", count);
+    return -1;
+  }
+
+  /* Reserve an extra method at location 0 for the "toplevel" method,
+   * and another extra method for all other "unknown" methods.
+   */
+  count += 2;
+  pKeys->methods = new MethodEntry[count];
+  if (pKeys->methods == nullptr) return -1;
+  initMethodEntry(&pKeys->methods[TOPLEVEL_INDEX], -2, "(toplevel)", nullptr, nullptr,
+                  nullptr, nullptr);
+  initMethodEntry(&pKeys->methods[UNKNOWN_INDEX], -1, "(unknown)", nullptr, nullptr,
+                  nullptr, nullptr);
+
+  /*
+   * Extract all entries, starting with index 2.
+   */
+  for (int32_t i = UNKNOWN_INDEX + 1; i < count; i++) {
+    next = findNextChar(data, dataEnd - data, '\n');
+    assert(next > 0);
+    data[next] = '\0';
+
+    int32_t tab1 = findNextChar(data, next, '\t');
+    int32_t tab2 = findNextChar(data + (tab1 + 1), next - (tab1 + 1), '\t');
+    int32_t tab3 = findNextChar(data + (tab1 + tab2 + 2), next - (tab1 + tab2 + 2), '\t');
+    int32_t tab4 = findNextChar(data + (tab1 + tab2 + tab3 + 3),
+                                next - (tab1 + tab2 + tab3 + 3), '\t');
+    int32_t tab5 = findNextChar(data + (tab1 + tab2 + tab3 + tab4 + 4),
+                                next - (tab1 + tab2 + tab3 + tab4 + 4), '\t');
+    if (tab1 < 0) {
+      fprintf(stderr, "ERROR: missing field on method line: '%s'\n", data);
+      return -1;
+    }
+    assert(data[tab1] == '\t');
+    data[tab1] = '\0';
+
+    char* endptr;
+    int64_t id = strtoul(data, &endptr, 0);
+    if (*endptr != '\0') {
+      fprintf(stderr, "ERROR: bad method ID '%s'\n", data);
+      return -1;
+    }
+
+    // Allow files that specify just a function name, instead of requiring
+    // "class \t method \t signature"
+    if (tab2 > 0 && tab3 > 0) {
+      tab2 += tab1 + 1;
+      tab3 += tab2 + 1;
+      assert(data[tab2] == '\t');
+      assert(data[tab3] == '\t');
+      data[tab2] = data[tab3] = '\0';
+
+      // This is starting to get awkward.  Allow filename and line #.
+      if (tab4 > 0 && tab5 > 0) {
+        tab4 += tab3 + 1;
+        tab5 += tab4 + 1;
+
+        assert(data[tab4] == '\t');
+        assert(data[tab5] == '\t');
+        data[tab4] = data[tab5] = '\0';
+
+        initMethodEntry(&pKeys->methods[i], id, data + tab1 + 1,
+                        data + tab2 + 1, data + tab3 + 1, data + tab4 + 1,
+                        data + tab5 + 1);
+      } else {
+        initMethodEntry(&pKeys->methods[i], id, data + tab1 + 1,
+                        data + tab2 + 1, data + tab3 + 1, nullptr, nullptr);
+      }
+    } else {
+      initMethodEntry(&pKeys->methods[i], id, data + tab1 + 1, nullptr, nullptr, nullptr,
+                      nullptr);
+    }
+
+    data += next + 1;
+  }
+
+  pKeys->numMethods = count;
+  return data - pKeys->fileData;
+}
+
+/*
+ * Parse the "*end" section.
+ */
+int64_t parseEnd(DataKeys* pKeys, int64_t offset) {
+  if (offset < 0) return -1;
+
+  char* data = pKeys->fileData + offset;
+  char* dataEnd = pKeys->fileData + pKeys->fileLen;
+  int32_t next = checkToken(data, dataEnd - data, "end");
+  if (next < 0) return -1;
+
+  data += next;
+
+  return data - pKeys->fileData;
+}
+
+/*
+ * Sort the thread list entries.
+ */
+static int32_t compareThreads(const void* thread1, const void* thread2) {
+  return ((const ThreadEntry*) thread1)->threadId -
+         ((const ThreadEntry*) thread2)->threadId;
+}
+
+void sortThreadList(DataKeys* pKeys) {
+  qsort(pKeys->threads, pKeys->numThreads, sizeof(pKeys->threads[0]), compareThreads);
+}
+
+/*
+ * Sort the method list entries.
+ */
+static int32_t compareMethods(const void* meth1, const void* meth2) {
+  int64_t id1 = ((const MethodEntry*) meth1)->methodId;
+  int64_t id2 = ((const MethodEntry*) meth2)->methodId;
+  if (id1 < id2) return -1;
+  if (id1 > id2) return 1;
+  return 0;
+}
+
+void sortMethodList(DataKeys* pKeys) {
+  qsort(pKeys->methods, pKeys->numMethods, sizeof(MethodEntry), compareMethods);
+}
+
+/*
+ * Parse the key section, and return a copy of the parsed contents.
+ */
+DataKeys* parseKeys(FILE* fp, int32_t verbose) {
+  int64_t offset;
+  DataKeys* pKeys = new DataKeys();
+  memset(pKeys, 0, sizeof(DataKeys));
+  if (pKeys == nullptr) return nullptr;
+
+  /*
+   * We load the entire file into memory.  We do this, rather than memory-
+   * mapping it, because we want to change some whitespace to NULs.
+   */
+  if (fseek(fp, 0L, SEEK_END) != 0) {
+    perror("fseek");
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+  pKeys->fileLen = ftell(fp);
+  if (pKeys->fileLen == 0) {
+    fprintf(stderr, "Key file is empty.\n");
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+  rewind(fp);
+
+  pKeys->fileData = new char[pKeys->fileLen];
+  if (pKeys->fileData == nullptr) {
+    fprintf(stderr, "ERROR: unable to alloc %" PRIu64 " bytes\n", pKeys->fileLen);
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+
+  if (fread(pKeys->fileData, 1, pKeys->fileLen, fp) != (size_t)pKeys->fileLen) {
+    fprintf(stderr, "ERROR: unable to read %" PRIu64 " bytes from trace file\n", pKeys->fileLen);
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+
+  offset = 0;
+  offset = parseVersion(pKeys, offset, verbose);
+  offset = parseThreads(pKeys, offset);
+  offset = parseMethods(pKeys, offset);
+  offset = parseEnd(pKeys, offset);
+  if (offset < 0) {
+    freeDataKeys(pKeys);
+    return nullptr;
+  }
+
+  /* Reduce our allocation now that we know where the end of the key section is. */
+  pKeys->fileData = reinterpret_cast<char*>(realloc(pKeys->fileData, offset));
+  pKeys->fileLen = offset;
+  /* Leave fp pointing to the beginning of the data section. */
+  fseek(fp, offset, SEEK_SET);
+
+  sortThreadList(pKeys);
+  sortMethodList(pKeys);
+
+  /*
+   * Dump list of threads.
+   */
+  if (verbose) {
+    printf("Threads (%d):\n", pKeys->numThreads);
+    for (int32_t i = 0; i < pKeys->numThreads; i++) {
+      printf("%2d %s\n", pKeys->threads[i].threadId, pKeys->threads[i].threadName);
+    }
+  }
+
+#if 0
+  /*
+   * Dump list of methods.
+   */
+  if (verbose) {
+    printf("Methods (%d):\n", pKeys->numMethods);
+    for (int32_t i = 0; i < pKeys->numMethods; i++) {
+      printf("0x%08x %s : %s : %s\n",
+             pKeys->methods[i].methodId, pKeys->methods[i].className,
+             pKeys->methods[i].methodName, pKeys->methods[i].signature);
+    }
+  }
+#endif
+
+  return pKeys;
+}
+
+/*
+ * Read values from the binary data file.
+ */
+
+/*
+ * Make the return value "uint32_t" instead of "uint16_t" so that we can detect EOF.
+ */
+uint32_t read2LE(FILE* fp) {
+  uint32_t val = getc(fp);
+  val |= getc(fp) << 8;
+  return val;
+}
+uint32_t read4LE(FILE* fp) {
+  uint32_t val = getc(fp);
+  val |= getc(fp) << 8;
+  val |= getc(fp) << 16;
+  val |= getc(fp) << 24;
+  return val;
+}
+uint64_t read8LE(FILE* fp) {
+  uint64_t val = getc(fp);
+  val |= (uint64_t) getc(fp) << 8;
+  val |= (uint64_t) getc(fp) << 16;
+  val |= (uint64_t) getc(fp) << 24;
+  val |= (uint64_t) getc(fp) << 32;
+  val |= (uint64_t) getc(fp) << 40;
+  val |= (uint64_t) getc(fp) << 48;
+  val |= (uint64_t) getc(fp) << 56;
+  return val;
+}
+
+/*
+ * Parse the header of the data section.
+ *
+ * Returns with the file positioned at the start of the record data.
+ */
+int32_t parseDataHeader(FILE* fp, DataHeader* pHeader) {
+  pHeader->magic = read4LE(fp);
+  pHeader->version = read2LE(fp);
+  pHeader->offsetToData = read2LE(fp);
+  pHeader->startWhen = read8LE(fp);
+  int32_t bytesToRead = pHeader->offsetToData - 16;
+  if (pHeader->version == 1) {
+    pHeader->recordSize = 9;
+  } else if (pHeader->version == 2) {
+    pHeader->recordSize = 10;
+  } else if (pHeader->version == 3) {
+    pHeader->recordSize = read2LE(fp);
+    bytesToRead -= 2;
+  } else {
+    fprintf(stderr, "Unsupported trace file version: %d\n", pHeader->version);
+    return -1;
+  }
+
+  if (fseek(fp, bytesToRead, SEEK_CUR) != 0) {
+    return -1;
+  }
+
+  return 0;
+}
+
+/*
+ * Look up a method by it's method ID.
+ *
+ * Returns nullptr if no matching method was found.
+ */
+MethodEntry* lookupMethod(DataKeys* pKeys, int64_t methodId) {
+  int32_t lo = 0;
+  int32_t hi = pKeys->numMethods - 1;
+
+  while (hi >= lo) {
+    int32_t mid = (hi + lo) / 2;
+
+    int64_t id = pKeys->methods[mid].methodId;
+    if (id == methodId) /* match */
+      return &pKeys->methods[mid];
+    else if (id < methodId) /* too low */
+      lo = mid + 1;
+    else /* too high */
+      hi = mid - 1;
+  }
+
+  return nullptr;
+}
+
+/*
+ * Reads the next data record, and assigns the data values to threadId,
+ * methodVal and elapsedTime.  On end-of-file, the threadId, methodVal,
+ * and elapsedTime are unchanged.  Returns 1 on end-of-file, otherwise
+ * returns 0.
+ */
+int32_t readDataRecord(FILE* dataFp, DataHeader* dataHeader, int32_t* threadId,
+                   uint32_t* methodVal, uint64_t* elapsedTime) {
+  int32_t id;
+  int32_t bytesToRead = dataHeader->recordSize;
+  if (dataHeader->version == 1) {
+    id = getc(dataFp);
+    bytesToRead -= 1;
+  } else {
+    id = read2LE(dataFp);
+    bytesToRead -= 2;
+  }
+  if (id == EOF) return 1;
+  *threadId = id;
+
+  *methodVal = read4LE(dataFp);
+  *elapsedTime = read4LE(dataFp);
+  bytesToRead -= 8;
+
+  while (bytesToRead-- > 0) {
+    getc(dataFp);
+  }
+
+  if (feof(dataFp)) {
+    fprintf(stderr, "WARNING: hit EOF mid-record\n");
+    return 1;
+  }
+  return 0;
+}
+
+/*
+ * Read the key file and use it to produce formatted output from the
+ * data file.
+ */
+void dumpTrace() {
+  static const char* actionStr[] = {"ent", "xit", "unr", "???"};
+  MethodEntry bogusMethod = {
+      0, "???", "???",        "???",        "???",  -1, 0, 0,
+      0, 0,     {nullptr, nullptr}, {nullptr, nullptr}, {0, 0}, 0,  0, -1};
+  char bogusBuf[80];
+  TraceData traceData;
+
+  // printf("Dumping '%s' '%s'\n", dataFileName, keyFileName);
+
+  char spaces[MAX_STACK_DEPTH + 1];
+  memset(spaces, '.', MAX_STACK_DEPTH);
+  spaces[MAX_STACK_DEPTH] = '\0';
+
+  for (int32_t i = 0; i < MAX_THREADS; i++)
+    traceData.depth[i] = 2;  // adjust for return from start function
+
+  FILE* dataFp = fopen(gOptions.traceFileName, "rb");
+  if (dataFp == nullptr) return;
+
+  DataKeys* pKeys = parseKeys(dataFp, 1);
+  if (pKeys == nullptr) {
+    fclose(dataFp);
+    return;
+  }
+
+  DataHeader dataHeader;
+  if (parseDataHeader(dataFp, &dataHeader) < 0) {
+    fclose(dataFp);
+    freeDataKeys(pKeys);
+    return;
+  }
+
+  printf("Trace (threadID action usecs class.method signature):\n");
+
+  while (1) {
+    /*
+     * Extract values from file.
+     */
+    int32_t threadId;
+    uint32_t methodVal;
+    uint64_t elapsedTime;
+    if (readDataRecord(dataFp, &dataHeader, &threadId, &methodVal, &elapsedTime))
+      break;
+
+    int32_t action = METHOD_ACTION(methodVal);
+    int64_t methodId = METHOD_ID(methodVal);
+
+    /*
+     * Generate a line of output.
+     */
+    int64_t lastEnter = 0;
+    int32_t mismatch = 0;
+    if (action == METHOD_TRACE_ENTER) {
+      traceData.depth[threadId]++;
+      lastEnter = methodId;
+    } else {
+      /* quick test for mismatched adjacent enter/exit */
+      if (lastEnter != 0 && lastEnter != methodId) mismatch = 1;
+    }
+
+    int32_t printDepth = traceData.depth[threadId];
+    char depthNote = ' ';
+    if (printDepth < 0) {
+      printDepth = 0;
+      depthNote = '-';
+    } else if (printDepth > MAX_STACK_DEPTH) {
+      printDepth = MAX_STACK_DEPTH;
+      depthNote = '+';
+    }
+
+    MethodEntry* method = lookupMethod(pKeys, methodId);
+    if (method == nullptr) {
+      method = &bogusMethod;
+      sprintf(bogusBuf, "methodId: %#" PRIx64 "", methodId);
+      method->signature = bogusBuf;
+    }
+
+    if (method->methodName) {
+      printf("%2d %s%c %8" PRIu64 "%c%s%s.%s %s\n", threadId, actionStr[action],
+             mismatch ? '!' : ' ', elapsedTime, depthNote,
+             spaces + (MAX_STACK_DEPTH - printDepth), method->className,
+             method->methodName, method->signature);
+    } else {
+      printf("%2d %s%c %8" PRIu64 "%c%s%s\n", threadId, actionStr[action],
+             mismatch ? '!' : ' ', elapsedTime, depthNote,
+             spaces + (MAX_STACK_DEPTH - printDepth), method->className);
+    }
+
+    if (action != METHOD_TRACE_ENTER) {
+      traceData.depth[threadId]--; /* METHOD_TRACE_EXIT or METHOD_TRACE_UNROLL */
+      lastEnter = 0;
+    }
+
+    mismatch = 0;
+  }
+
+  fclose(dataFp);
+  freeDataKeys(pKeys);
+}
+
+/* This routine adds the given time to the parent and child methods.
+ * This is called when the child routine exits, after the child has
+ * been popped from the stack.  The elapsedTime parameter is the
+ * duration of the child routine, including time spent in called routines.
+ */
+void addInclusiveTime(MethodEntry* parent, MethodEntry* child, uint64_t elapsedTime) {
+#if 0
+  bool verbose = false;
+  if (strcmp(child->className, debugClassName) == 0)
+    verbose = true;
+#endif
+
+  int32_t childIsRecursive = (child->recursiveEntries > 0);
+  int32_t parentIsRecursive = (parent->recursiveEntries > 1);
+
+  if (child->recursiveEntries == 0) {
+    child->elapsedInclusive += elapsedTime;
+  } else if (child->recursiveEntries == 1) {
+    child->recursiveInclusive += elapsedTime;
+  }
+  child->numCalls[childIsRecursive] += 1;
+
+#if 0
+  if (verbose) {
+    fprintf(stderr,
+            "%s %d elapsedTime: %lld eI: %lld, rI: %lld\n",
+            child->className, child->recursiveEntries,
+            elapsedTime, child->elapsedInclusive,
+            child->recursiveInclusive);
+  }
+#endif
+
+  /* Find the child method in the parent */
+  TimedMethod* pTimed;
+  TimedMethod* children = parent->children[parentIsRecursive];
+  for (pTimed = children; pTimed; pTimed = pTimed->next) {
+    if (pTimed->method == child) {
+      pTimed->elapsedInclusive += elapsedTime;
+      pTimed->numCalls += 1;
+      break;
+    }
+  }
+  if (pTimed == nullptr) {
+    /* Allocate a new TimedMethod */
+    pTimed = new TimedMethod();
+    pTimed->elapsedInclusive = elapsedTime;
+    pTimed->numCalls = 1;
+    pTimed->method = child;
+
+    /* Add it to the front of the list */
+    pTimed->next = children;
+    parent->children[parentIsRecursive] = pTimed;
+  }
+
+  /* Find the parent method in the child */
+  TimedMethod* parents = child->parents[childIsRecursive];
+  for (pTimed = parents; pTimed; pTimed = pTimed->next) {
+    if (pTimed->method == parent) {
+      pTimed->elapsedInclusive += elapsedTime;
+      pTimed->numCalls += 1;
+      break;
+    }
+  }
+  if (pTimed == nullptr) {
+    /* Allocate a new TimedMethod */
+    pTimed = new TimedMethod();
+    pTimed->elapsedInclusive = elapsedTime;
+    pTimed->numCalls = 1;
+    pTimed->method = parent;
+
+    /* Add it to the front of the list */
+    pTimed->next = parents;
+    child->parents[childIsRecursive] = pTimed;
+  }
+
+#if 0
+  if (verbose) {
+    fprintf(stderr,
+            "  %s %d eI: %lld\n",
+            parent->className, parent->recursiveEntries,
+            pTimed->elapsedInclusive);
+  }
+#endif
+}
+
+/* Sorts a linked list and returns a newly allocated array containing
+ * the sorted entries.
+ */
+TimedMethod* sortTimedMethodList(TimedMethod* list, int32_t* num) {
+  /* Count the elements */
+  TimedMethod* pTimed;
+  int32_t num_entries = 0;
+  for (pTimed = list; pTimed; pTimed = pTimed->next) num_entries += 1;
+  *num = num_entries;
+  if (num_entries == 0) return nullptr;
+
+  /* Copy all the list elements to a new array and sort them */
+  int32_t ii;
+  TimedMethod* sorted = new TimedMethod[num_entries];
+  for (ii = 0, pTimed = list; pTimed; pTimed = pTimed->next, ++ii)
+    memcpy(&sorted[ii], pTimed, sizeof(TimedMethod));
+  qsort(sorted, num_entries, sizeof(TimedMethod), compareTimedMethod);
+
+  /* Fix up the "next" pointers so that they work. */
+  for (ii = 0; ii < num_entries - 1; ++ii) sorted[ii].next = &sorted[ii + 1];
+  sorted[num_entries - 1].next = nullptr;
+
+  return sorted;
+}
+
+/* Define flag values for printInclusiveMethod() */
+static const int32_t kIsRecursive = 1;
+
+/* This prints the inclusive stats for all the parents or children of a
+ * method, depending on the list that is passed in.
+ */
+void printInclusiveMethod(MethodEntry* method, TimedMethod* list, int32_t numCalls, int32_t flags) {
+  char buf[80];
+  const char* anchor_close = "";
+  const char* spaces = "      "; /* 6 spaces */
+  int32_t num_spaces = strlen(spaces);
+  const char* space_ptr = &spaces[num_spaces];
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+
+  if (gOptions.outputHtml) anchor_close = "</a>";
+
+  int32_t num;
+  TimedMethod* sorted = sortTimedMethodList(list, &num);
+  double methodTotal = method->elapsedInclusive;
+  for (TimedMethod* pTimed = sorted; pTimed; pTimed = pTimed->next) {
+    MethodEntry* relative = pTimed->method;
+    const char* className = relative->className;
+    const char* methodName = relative->methodName;
+    const char* signature = relative->signature;
+    double per = 100.0 * pTimed->elapsedInclusive / methodTotal;
+    sprintf(buf, "[%d]", relative->index);
+    if (gOptions.outputHtml) {
+      int32_t len = strlen(buf);
+      if (len > num_spaces) len = num_spaces;
+      sprintf(buf, "<a href=\"#m%d\">[%d]", relative->index, relative->index);
+      space_ptr = &spaces[len];
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+    }
+    int32_t nCalls = numCalls;
+    if (nCalls == 0) nCalls = relative->numCalls[0] + relative->numCalls[1];
+    if (relative->methodName) {
+      if (flags & kIsRecursive) {
+        // Don't display percentages for recursive functions
+        printf("%6s %5s   %6s %s%6s%s %6d/%-6d %9" PRIu64 " %s.%s %s\n", "", "",
+               "", space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className, methodName, signature);
+      } else {
+        printf("%6s %5s   %5.1f%% %s%6s%s %6d/%-6d %9" PRIu64 " %s.%s %s\n", "",
+               "", per, space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className, methodName, signature);
+      }
+    } else {
+      if (flags & kIsRecursive) {
+        // Don't display percentages for recursive functions
+        printf("%6s %5s   %6s %s%6s%s %6d/%-6d %9" PRIu64 " %s\n", "", "", "",
+               space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className);
+      } else {
+        printf("%6s %5s   %5.1f%% %s%6s%s %6d/%-6d %9" PRIu64 " %s\n", "", "",
+               per, space_ptr, buf, anchor_close, pTimed->numCalls, nCalls,
+               pTimed->elapsedInclusive, className);
+      }
+    }
+  }
+}
+
+void countRecursiveEntries(CallStack* pStack, int32_t top, MethodEntry* method) {
+  method->recursiveEntries = 0;
+  for (int32_t ii = 0; ii < top; ++ii) {
+    if (pStack->calls[ii].method == method) method->recursiveEntries += 1;
+  }
+}
+
+void stackDump(CallStack* pStack, int32_t top) {
+  for (int32_t ii = 0; ii < top; ++ii) {
+    MethodEntry* method = pStack->calls[ii].method;
+    uint64_t entryTime = pStack->calls[ii].entryTime;
+    if (method->methodName) {
+      fprintf(stderr, "  %2d: %8" PRIu64 " %s.%s %s\n", ii, entryTime,
+              method->className, method->methodName, method->signature);
+    } else {
+      fprintf(stderr, "  %2d: %8" PRIu64 " %s\n", ii, entryTime, method->className);
+    }
+  }
+}
+
+void outputTableOfContents() {
+  printf("<a name=\"contents\"></a>\n");
+  printf("<h2>Table of Contents</h2>\n");
+  printf("<ul>\n");
+  printf("  <li><a href=\"#exclusive\">Exclusive profile</a></li>\n");
+  printf("  <li><a href=\"#inclusive\">Inclusive profile</a></li>\n");
+  printf("  <li><a href=\"#class\">Class/method profile</a></li>\n");
+  printf("  <li><a href=\"#method\">Method/class profile</a></li>\n");
+  printf("</ul>\n\n");
+}
+
+void outputNavigationBar() {
+  printf("<a href=\"#contents\">[Top]</a>\n");
+  printf("<a href=\"#exclusive\">[Exclusive]</a>\n");
+  printf("<a href=\"#inclusive\">[Inclusive]</a>\n");
+  printf("<a href=\"#class\">[Class]</a>\n");
+  printf("<a href=\"#method\">[Method]</a>\n");
+  printf("<br><br>\n");
+}
+
+void printExclusiveProfile(MethodEntry** pMethods, int32_t numMethods, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+  const char* anchor_close = "";
+  char anchor_buf[80];
+  anchor_buf[0] = 0;
+  if (gOptions.outputHtml) {
+    anchor_close = "</a>";
+    printf("<a name=\"exclusive\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  /* First, sort the methods into decreasing order of inclusive
+   * elapsed time so that we can assign the method indices.
+   */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+
+  for (int32_t ii = 0; ii < numMethods; ++ii) pMethods[ii]->index = ii;
+
+  /* Sort the methods into decreasing order of exclusive elapsed time. */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareElapsedExclusive);
+
+  printf("Total cycles: %" PRIu64 "\n\n", sumThreadTime);
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n");
+  }
+  printf("Exclusive elapsed times for each method, not including time spent in\n");
+  printf("children, sorted by exclusive time.\n\n");
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n<pre>\n");
+  }
+
+  printf("    Usecs  self %%  sum %%  Method\n");
+
+  double sum = 0;
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    MethodEntry* method = pMethods[ii];
+    /* Don't show methods with zero cycles */
+    if (method->elapsedExclusive == 0) break;
+    const char* className = method->className;
+    const char* methodName = method->methodName;
+    const char* signature = method->signature;
+    sum += method->elapsedExclusive;
+    double per = 100.0 * method->elapsedExclusive / total;
+    double sum_per = 100.0 * sum / total;
+    if (gOptions.outputHtml) {
+      sprintf(anchor_buf, "<a href=\"#m%d\">", method->index);
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+    }
+    if (method->methodName) {
+      printf("%9" PRIu64 "  %6.2f %6.2f  %s[%d]%s %s.%s %s\n",
+             method->elapsedExclusive, per, sum_per, anchor_buf, method->index,
+             anchor_close, className, methodName, signature);
+    } else {
+      printf("%9" PRIu64 "  %6.2f %6.2f  %s[%d]%s %s\n",
+             method->elapsedExclusive, per, sum_per, anchor_buf, method->index,
+             anchor_close, className);
+    }
+  }
+  if (gOptions.outputHtml) {
+    printf("</pre>\n");
+  }
+}
+
+/* check to make sure that the child method meets the threshold of the parent */
+int32_t checkThreshold(MethodEntry* parent, MethodEntry* child) {
+  double parentTime = parent->elapsedInclusive;
+  double childTime = child->elapsedInclusive;
+  int64_t percentage = (childTime / parentTime) * 100.0;
+  return (percentage < gOptions.threshold) ? 0 : 1;
+}
+
+void createLabels(FILE* file, MethodEntry* method) {
+  fprintf(file,
+          "node%d[label = \"[%d] %s.%s (%" PRIu64 ", %" PRIu64 ", %d)\"]\n",
+          method->index, method->index, method->className, method->methodName,
+          method->elapsedInclusive / 1000, method->elapsedExclusive / 1000,
+          method->numCalls[0]);
+
+  method->graphState = GRAPH_LABEL_VISITED;
+
+  for (TimedMethod* child = method->children[0]; child; child = child->next) {
+    MethodEntry* childMethod = child->method;
+
+    if ((childMethod->graphState & GRAPH_LABEL_VISITED) == 0 &&
+        checkThreshold(method, childMethod)) {
+      createLabels(file, child->method);
+    }
+  }
+}
+
+void createLinks(FILE* file, MethodEntry* method) {
+  method->graphState |= GRAPH_NODE_VISITED;
+
+  for (TimedMethod* child = method->children[0]; child; child = child->next) {
+    MethodEntry* childMethod = child->method;
+    if (checkThreshold(method, child->method)) {
+      fprintf(file, "node%d -> node%d\n", method->index, child->method->index);
+      // only visit children that haven't been visited before
+      if ((childMethod->graphState & GRAPH_NODE_VISITED) == 0) {
+        createLinks(file, child->method);
+      }
+    }
+  }
+}
+
+void createInclusiveProfileGraphNew(DataKeys* dataKeys) {
+  // create a temporary file in /tmp
+  char path[FILENAME_MAX];
+  if (gOptions.keepDotFile) {
+    snprintf(path, FILENAME_MAX, "%s.dot", gOptions.graphFileName);
+  } else {
+    snprintf(path, FILENAME_MAX, "dot-%d-%d.dot", (int32_t)time(nullptr), rand());
+  }
+
+  FILE* file = fopen(path, "w+");
+
+  fprintf(file, "digraph g {\nnode [shape = record,height=.1];\n");
+
+  createLabels(file, dataKeys->methods);
+  createLinks(file, dataKeys->methods);
+
+  fprintf(file, "}");
+  fclose(file);
+
+  // now that we have the dot file generate the image
+  char command[1024];
+  snprintf(command, 1024, "dot -Tpng -o \"%s\" \"%s\"", gOptions.graphFileName, path);
+
+  system(command);
+
+  if (!gOptions.keepDotFile) {
+    remove(path);
+  }
+}
+
+void printInclusiveProfile(MethodEntry** pMethods, int32_t numMethods, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+  char anchor_buf[80];
+  const char* anchor_close = "";
+  anchor_buf[0] = 0;
+  if (gOptions.outputHtml) {
+    anchor_close = "</a>";
+    printf("<a name=\"inclusive\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  /* Sort the methods into decreasing order of inclusive elapsed time. */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+
+  printf("\nInclusive elapsed times for each method and its parents and children,\n");
+  printf("sorted by inclusive time.\n\n");
+
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n<pre>\n");
+  }
+
+  printf("index  %%/total %%/self  index     calls         usecs name\n");
+
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    char buf[40];
+
+    MethodEntry* method = pMethods[ii];
+    /* Don't show methods with zero cycles */
+    if (method->elapsedInclusive == 0) break;
+
+    const char* className = method->className;
+    const char* methodName = method->methodName;
+    const char* signature = method->signature;
+
+    if (gOptions.outputHtml) {
+      printf("<a name=\"m%d\"></a>", method->index);
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+    }
+    printf("----------------------------------------------------\n");
+
+    /* Sort and print the parents */
+    int32_t numCalls = method->numCalls[0] + method->numCalls[1];
+    printInclusiveMethod(method, method->parents[0], numCalls, 0);
+    if (method->parents[1]) {
+      printf("               +++++++++++++++++++++++++\n");
+      printInclusiveMethod(method, method->parents[1], numCalls, kIsRecursive);
+    }
+
+    double per = 100.0 * method->elapsedInclusive / total;
+    sprintf(buf, "[%d]", ii);
+    if (method->methodName) {
+      printf("%-6s %5.1f%%   %5s %6s %6d+%-6d %9" PRIu64 " %s.%s %s\n", buf,
+             per, "", "", method->numCalls[0], method->numCalls[1],
+             method->elapsedInclusive, className, methodName, signature);
+    } else {
+      printf("%-6s %5.1f%%   %5s %6s %6d+%-6d %9" PRIu64 " %s\n", buf, per, "",
+             "", method->numCalls[0], method->numCalls[1],
+             method->elapsedInclusive, className);
+    }
+    double excl_per = 100.0 * method->topExclusive / method->elapsedInclusive;
+    printf("%6s %5s   %5.1f%% %6s %6s %6s %9" PRIu64 "\n", "", "", excl_per,
+           "excl", "", "", method->topExclusive);
+
+    /* Sort and print the children */
+    printInclusiveMethod(method, method->children[0], 0, 0);
+    if (method->children[1]) {
+      printf("               +++++++++++++++++++++++++\n");
+      printInclusiveMethod(method, method->children[1], 0, kIsRecursive);
+    }
+  }
+  if (gOptions.outputHtml) {
+    printf("</pre>\n");
+  }
+}
+
+void createClassList(TraceData* traceData, MethodEntry** pMethods, int32_t numMethods) {
+  /* Sort the methods into alphabetical order to find the unique class
+   * names.
+   */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareClassNames);
+
+  /* Count the number of unique class names. */
+  const char* currentClassName = "";
+  const char* firstClassName = nullptr;
+  traceData->numClasses = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) {
+      continue;
+    }
+    if (strcmp(pMethods[ii]->className, currentClassName) != 0) {
+      // Remember the first one
+      if (firstClassName == nullptr) {
+        firstClassName = pMethods[ii]->className;
+      }
+      traceData->numClasses += 1;
+      currentClassName = pMethods[ii]->className;
+    }
+  }
+
+  if (traceData->numClasses == 0) {
+    traceData->classes = nullptr;
+    return;
+  }
+
+  /* Allocate space for all of the unique class names */
+  traceData->classes = new ClassEntry[traceData->numClasses];
+
+  /* Initialize the classes array */
+  memset(traceData->classes, 0, sizeof(ClassEntry) * traceData->numClasses);
+  ClassEntry* pClass = traceData->classes;
+  pClass->className = currentClassName = firstClassName;
+  int32_t prevNumMethods = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) {
+      continue;
+    }
+    if (strcmp(pMethods[ii]->className, currentClassName) != 0) {
+      pClass->numMethods = prevNumMethods;
+      (++pClass)->className = currentClassName = pMethods[ii]->className;
+      prevNumMethods = 0;
+    }
+    prevNumMethods += 1;
+  }
+  pClass->numMethods = prevNumMethods;
+
+  /* Create the array of MethodEntry pointers for each class */
+  pClass = nullptr;
+  currentClassName = "";
+  int32_t nextMethod = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) {
+      continue;
+    }
+    if (strcmp(pMethods[ii]->className, currentClassName) != 0) {
+      currentClassName = pMethods[ii]->className;
+      if (pClass == nullptr)
+        pClass = traceData->classes;
+      else
+        pClass++;
+      /* Allocate space for the methods array */
+      pClass->methods = new MethodEntry*[pClass->numMethods];
+      nextMethod = 0;
+    }
+    pClass->methods[nextMethod++] = pMethods[ii];
+  }
+}
+
+/* Prints a number of html non-breaking spaces according so that the length
+ * of the string "buf" is at least "width" characters wide.  If width is
+ * negative, then trailing spaces are added instead of leading spaces.
+ */
+void printHtmlField(char* buf, int32_t width) {
+  int32_t leadingSpaces = 1;
+  if (width < 0) {
+    width = -width;
+    leadingSpaces = 0;
+  }
+  int32_t len = strlen(buf);
+  int32_t numSpaces = width - len;
+  if (numSpaces <= 0) {
+    printf("%s", buf);
+    return;
+  }
+  if (leadingSpaces == 0) printf("%s", buf);
+  for (int32_t ii = 0; ii < numSpaces; ++ii) printf("&nbsp;");
+  if (leadingSpaces == 1) printf("%s", buf);
+}
+
+void printClassProfiles(TraceData* traceData, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE];
+  char methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+
+  if (gOptions.outputHtml) {
+    printf("<a name=\"class\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  if (traceData->numClasses == 0) {
+    printf("\nNo classes.\n");
+    if (gOptions.outputHtml) {
+      printf("<br><br>\n");
+    }
+    return;
+  }
+
+  printf("\nExclusive elapsed time for each class, summed over all the methods\n");
+  printf("in the class.\n\n");
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n");
+  }
+
+  /* For each class, sum the exclusive times in all of the methods
+   * in that class.  Also sum the number of method calls.  Also
+   * sort the methods so the most expensive appear at the top.
+   */
+  ClassEntry* pClass = traceData->classes;
+  for (int32_t ii = 0; ii < traceData->numClasses; ++ii, ++pClass) {
+    // printf("%s %d methods\n", pClass->className, pClass->numMethods);
+    int32_t numMethods = pClass->numMethods;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pClass->methods[jj];
+      pClass->elapsedExclusive += method->elapsedExclusive;
+      pClass->numCalls[0] += method->numCalls[0];
+      pClass->numCalls[1] += method->numCalls[1];
+    }
+
+    /* Sort the methods into decreasing order of exclusive time */
+    qsort(pClass->methods, numMethods, sizeof(MethodEntry*), compareElapsedExclusive);
+  }
+
+  /* Allocate an array of pointers to the classes for more efficient sorting. */
+  ClassEntry** pClasses = new ClassEntry*[traceData->numClasses];
+  for (int32_t ii = 0; ii < traceData->numClasses; ++ii)
+    pClasses[ii] = &traceData->classes[ii];
+
+  /* Sort the classes into decreasing order of exclusive time */
+  qsort(pClasses, traceData->numClasses, sizeof(ClassEntry*), compareClassExclusive);
+
+  if (gOptions.outputHtml) {
+    printf(
+        "<div class=\"header\"><span "
+        "class=\"parent\">&nbsp;</span>&nbsp;&nbsp;&nbsp;");
+    printf("Cycles %%/total Cumul.%% &nbsp;Calls+Recur&nbsp; Class</div>\n");
+  } else {
+    printf("   Cycles %%/total Cumul.%%  Calls+Recur  Class\n");
+  }
+
+  double sum = 0;
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < traceData->numClasses; ++ii) {
+    /* Skip classes with zero cycles */
+    pClass = pClasses[ii];
+    if (pClass->elapsedExclusive == 0) break;
+
+    sum += pClass->elapsedExclusive;
+    double per = 100.0 * pClass->elapsedExclusive / total;
+    double sum_per = 100.0 * sum / total;
+    const char* className = pClass->className;
+    if (gOptions.outputHtml) {
+      char buf[80];
+
+      className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+      printf(
+          "<div class=\"link\" onClick=\"javascript:toggle('d%d')\" "
+          "onMouseOver=\"javascript:onMouseOver(this)\" "
+          "onMouseOut=\"javascript:onMouseOut(this)\"><span class=\"parent\" "
+          "id=\"xd%d\">+</span>",
+          ii, ii);
+      sprintf(buf, "%" PRIu64, pClass->elapsedExclusive);
+      printHtmlField(buf, 9);
+      printf(" ");
+      sprintf(buf, "%.1f", per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%.1f", sum_per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%d", pClass->numCalls[0]);
+      printHtmlField(buf, 6);
+      printf("+");
+      sprintf(buf, "%d", pClass->numCalls[1]);
+      printHtmlField(buf, -6);
+      printf(" ");
+      printf("%s", className);
+      printf("</div>\n");
+      printf("<div class=\"parent\" id=\"d%d\">\n", ii);
+    } else {
+      printf("---------------------------------------------\n");
+      printf("%9" PRIu64 " %7.1f %7.1f %6d+%-6d %s\n", pClass->elapsedExclusive,
+             per, sum_per, pClass->numCalls[0], pClass->numCalls[1], className);
+    }
+
+    int32_t numMethods = pClass->numMethods;
+    double classExclusive = pClass->elapsedExclusive;
+    double sumMethods = 0;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pClass->methods[jj];
+      const char* methodName = method->methodName;
+      const char* signature = method->signature;
+      per = 100.0 * method->elapsedExclusive / classExclusive;
+      sumMethods += method->elapsedExclusive;
+      sum_per = 100.0 * sumMethods / classExclusive;
+      if (gOptions.outputHtml) {
+        char buf[80];
+
+        methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+        signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+        printf("<div class=\"leaf\"><span class=\"leaf\">&nbsp;</span>");
+        sprintf(buf, "%" PRIu64, method->elapsedExclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%" PRIu64, method->elapsedInclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", sum_per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%d", method->numCalls[0]);
+        printHtmlField(buf, 6);
+        printf("+");
+        sprintf(buf, "%d", method->numCalls[1]);
+        printHtmlField(buf, -6);
+        printf("&nbsp;");
+        printf("<a href=\"#m%d\">[%d]</a>&nbsp;%s&nbsp;%s", method->index,
+               method->index, methodName, signature);
+        printf("</div>\n");
+      } else {
+        printf("%9" PRIu64 " %9" PRIu64 " %7.1f %7.1f %6d+%-6d [%d] %s %s\n",
+               method->elapsedExclusive, method->elapsedInclusive, per, sum_per,
+               method->numCalls[0], method->numCalls[1], method->index,
+               methodName, signature);
+      }
+    }
+    if (gOptions.outputHtml) {
+      printf("</div>\n");
+    }
+  }
+}
+
+void createUniqueMethodList(TraceData* traceData, MethodEntry** pMethods, int32_t numMethods) {
+  /* Sort the methods into alphabetical order of method names
+   * to find the unique method names.
+   */
+  qsort(pMethods, numMethods, sizeof(MethodEntry*), compareMethodNames);
+
+  /* Count the number of unique method names, ignoring class and signature. */
+  const char* currentMethodName = "";
+  traceData->numUniqueMethods = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) continue;
+    if (strcmp(pMethods[ii]->methodName, currentMethodName) != 0) {
+      traceData->numUniqueMethods += 1;
+      currentMethodName = pMethods[ii]->methodName;
+    }
+  }
+  if (traceData->numUniqueMethods == 0) return;
+
+  /* Allocate space for pointers to all of the unique methods */
+  traceData->uniqueMethods = new UniqueMethodEntry[traceData->numUniqueMethods];
+
+  /* Initialize the uniqueMethods array */
+  memset(traceData->uniqueMethods, 0, sizeof(UniqueMethodEntry) * traceData->numUniqueMethods);
+  UniqueMethodEntry* pUnique = traceData->uniqueMethods;
+  currentMethodName = nullptr;
+  int32_t prevNumMethods = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) continue;
+    if (currentMethodName == nullptr) currentMethodName = pMethods[ii]->methodName;
+    if (strcmp(pMethods[ii]->methodName, currentMethodName) != 0) {
+      currentMethodName = pMethods[ii]->methodName;
+      pUnique->numMethods = prevNumMethods;
+      pUnique++;
+      prevNumMethods = 0;
+    }
+    prevNumMethods += 1;
+  }
+  pUnique->numMethods = prevNumMethods;
+
+  /* Create the array of MethodEntry pointers for each unique method */
+  pUnique = nullptr;
+  currentMethodName = "";
+  int32_t nextMethod = 0;
+  for (int32_t ii = 0; ii < numMethods; ++ii) {
+    if (pMethods[ii]->methodName == nullptr) continue;
+    if (strcmp(pMethods[ii]->methodName, currentMethodName) != 0) {
+      currentMethodName = pMethods[ii]->methodName;
+      if (pUnique == nullptr)
+        pUnique = traceData->uniqueMethods;
+      else
+        pUnique++;
+      /* Allocate space for the methods array */
+      pUnique->methods = new MethodEntry*[pUnique->numMethods];
+      nextMethod = 0;
+    }
+    pUnique->methods[nextMethod++] = pMethods[ii];
+  }
+}
+
+void printMethodProfiles(TraceData* traceData, uint64_t sumThreadTime) {
+  char classBuf[HTML_BUFSIZE], methodBuf[HTML_BUFSIZE];
+  char signatureBuf[HTML_BUFSIZE];
+
+  if (traceData->numUniqueMethods == 0) return;
+
+  if (gOptions.outputHtml) {
+    printf("<a name=\"method\"></a>\n");
+    printf("<hr>\n");
+    outputNavigationBar();
+  } else {
+    printf("\n%s\n", profileSeparator);
+  }
+
+  printf("\nExclusive elapsed time for each method, summed over all the classes\n");
+  printf("that contain a method with the same name.\n\n");
+  if (gOptions.outputHtml) {
+    printf("<br><br>\n");
+  }
+
+  /* For each unique method, sum the exclusive times in all of the methods
+   * with the same name.  Also sum the number of method calls.  Also
+   * sort the methods so the most expensive appear at the top.
+   */
+  UniqueMethodEntry* pUnique = traceData->uniqueMethods;
+  for (int32_t ii = 0; ii < traceData->numUniqueMethods; ++ii, ++pUnique) {
+    int32_t numMethods = pUnique->numMethods;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pUnique->methods[jj];
+      pUnique->elapsedExclusive += method->elapsedExclusive;
+      pUnique->numCalls[0] += method->numCalls[0];
+      pUnique->numCalls[1] += method->numCalls[1];
+    }
+
+    /* Sort the methods into decreasing order of exclusive time */
+    qsort(pUnique->methods, numMethods, sizeof(MethodEntry*), compareElapsedExclusive);
+  }
+
+  /* Allocate an array of pointers to the methods for more efficient sorting. */
+  UniqueMethodEntry** pUniqueMethods = new UniqueMethodEntry*[traceData->numUniqueMethods];
+  for (int32_t ii = 0; ii < traceData->numUniqueMethods; ++ii)
+    pUniqueMethods[ii] = &traceData->uniqueMethods[ii];
+
+  /* Sort the methods into decreasing order of exclusive time */
+  qsort(pUniqueMethods, traceData->numUniqueMethods, sizeof(UniqueMethodEntry*),
+        compareUniqueExclusive);
+
+  if (gOptions.outputHtml) {
+    printf(
+        "<div class=\"header\"><span "
+        "class=\"parent\">&nbsp;</span>&nbsp;&nbsp;&nbsp;");
+    printf("Cycles %%/total Cumul.%% &nbsp;Calls+Recur&nbsp; Method</div>\n");
+  } else {
+    printf("   Cycles %%/total Cumul.%%  Calls+Recur  Method\n");
+  }
+
+  double sum = 0;
+  double total = sumThreadTime;
+  for (int32_t ii = 0; ii < traceData->numUniqueMethods; ++ii) {
+    /* Skip methods with zero cycles */
+    pUnique = pUniqueMethods[ii];
+    if (pUnique->elapsedExclusive == 0) break;
+
+    sum += pUnique->elapsedExclusive;
+    double per = 100.0 * pUnique->elapsedExclusive / total;
+    double sum_per = 100.0 * sum / total;
+    const char* methodName = pUnique->methods[0]->methodName;
+    if (gOptions.outputHtml) {
+      char buf[80];
+
+      methodName = htmlEscape(methodName, methodBuf, HTML_BUFSIZE);
+      printf(
+          "<div class=\"link\" onClick=\"javascript:toggle('e%d')\" "
+          "onMouseOver=\"javascript:onMouseOver(this)\" "
+          "onMouseOut=\"javascript:onMouseOut(this)\"><span class=\"parent\" "
+          "id=\"xe%d\">+</span>",
+          ii, ii);
+      sprintf(buf, "%" PRIu64, pUnique->elapsedExclusive);
+      printHtmlField(buf, 9);
+      printf(" ");
+      sprintf(buf, "%.1f", per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%.1f", sum_per);
+      printHtmlField(buf, 7);
+      printf(" ");
+      sprintf(buf, "%d", pUnique->numCalls[0]);
+      printHtmlField(buf, 6);
+      printf("+");
+      sprintf(buf, "%d", pUnique->numCalls[1]);
+      printHtmlField(buf, -6);
+      printf(" ");
+      printf("%s", methodName);
+      printf("</div>\n");
+      printf("<div class=\"parent\" id=\"e%d\">\n", ii);
+    } else {
+      printf("---------------------------------------------\n");
+      printf("%9" PRIu64 " %7.1f %7.1f %6d+%-6d %s\n",
+             pUnique->elapsedExclusive, per, sum_per, pUnique->numCalls[0],
+             pUnique->numCalls[1], methodName);
+    }
+    int32_t numMethods = pUnique->numMethods;
+    double methodExclusive = pUnique->elapsedExclusive;
+    double sumMethods = 0;
+    for (int32_t jj = 0; jj < numMethods; ++jj) {
+      MethodEntry* method = pUnique->methods[jj];
+      const char* className = method->className;
+      const char* signature = method->signature;
+      per = 100.0 * method->elapsedExclusive / methodExclusive;
+      sumMethods += method->elapsedExclusive;
+      sum_per = 100.0 * sumMethods / methodExclusive;
+      if (gOptions.outputHtml) {
+        char buf[80];
+
+        className = htmlEscape(className, classBuf, HTML_BUFSIZE);
+        signature = htmlEscape(signature, signatureBuf, HTML_BUFSIZE);
+        printf("<div class=\"leaf\"><span class=\"leaf\">&nbsp;</span>");
+        sprintf(buf, "%" PRIu64, method->elapsedExclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%" PRIu64, method->elapsedInclusive);
+        printHtmlField(buf, 9);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%.1f", sum_per);
+        printHtmlField(buf, 7);
+        printf("&nbsp;");
+        sprintf(buf, "%d", method->numCalls[0]);
+        printHtmlField(buf, 6);
+        printf("+");
+        sprintf(buf, "%d", method->numCalls[1]);
+        printHtmlField(buf, -6);
+        printf("&nbsp;");
+        printf("<a href=\"#m%d\">[%d]</a>&nbsp;%s.%s&nbsp;%s", method->index,
+               method->index, className, methodName, signature);
+        printf("</div>\n");
+      } else {
+        printf("%9" PRIu64 " %9" PRIu64 " %7.1f %7.1f %6d+%-6d [%d] %s.%s %s\n",
+               method->elapsedExclusive, method->elapsedInclusive, per, sum_per,
+               method->numCalls[0], method->numCalls[1], method->index,
+               className, methodName, signature);
+      }
+    }
+    if (gOptions.outputHtml) {
+      printf("</div>\n");
+    }
+  }
+}
+
+/*
+ * Read the key and data files and return the MethodEntries for those files
+ */
+DataKeys* parseDataKeys(TraceData* traceData, const char* traceFileName, uint64_t* threadTime) {
+  MethodEntry* caller;
+
+  FILE* dataFp = fopen(traceFileName, "rb");
+  if (dataFp == nullptr) return nullptr;
+
+  DataKeys* dataKeys = parseKeys(dataFp, 0);
+  if (dataKeys == nullptr) {
+    fclose(dataFp);
+    return nullptr;
+  }
+
+  DataHeader dataHeader;
+  if (parseDataHeader(dataFp, &dataHeader) < 0) {
+    fclose(dataFp);
+    return dataKeys;
+  }
+
+#if 0
+  FILE* dumpStream = fopen("debug", "w");
+#endif
+  while (1) {
+    /*
+     * Extract values from file.
+     */
+    int32_t threadId;
+    uint32_t methodVal;
+    uint64_t currentTime;
+    if (readDataRecord(dataFp, &dataHeader, &threadId, &methodVal, &currentTime))
+      break;
+
+    int32_t action = METHOD_ACTION(methodVal);
+    int64_t methodId = METHOD_ID(methodVal);
+
+    /* Get the call stack for this thread */
+    CallStack* pStack = traceData->stacks[threadId];
+
+    /* If there is no call stack yet for this thread, then allocate one */
+    if (pStack == nullptr) {
+      pStack = new CallStack();
+      pStack->top = 0;
+      pStack->lastEventTime = currentTime;
+      pStack->threadStartTime = currentTime;
+      traceData->stacks[threadId] = pStack;
+    }
+
+    /* Lookup the current method */
+    MethodEntry* method = lookupMethod(dataKeys, methodId);
+    if (method == nullptr) method = &dataKeys->methods[UNKNOWN_INDEX];
+
+#if 0
+    if (method->methodName) {
+      fprintf(dumpStream, "%2d %-8llu %d %8llu r %d c %d %s.%s %s\n",
+              threadId, currentTime, action, pStack->threadStartTime,
+              method->recursiveEntries,
+              pStack->top, method->className, method->methodName,
+              method->signature);
+    } else {
+      fprintf(dumpStream, "%2d %-8llu %d %8llu r %d c %d %s\n",
+              threadId, currentTime, action, pStack->threadStartTime,
+              method->recursiveEntries,
+              pStack->top, method->className);
+    }
+#endif
+
+    if (action == METHOD_TRACE_ENTER) {
+      /* This is a method entry */
+      if (pStack->top >= MAX_STACK_DEPTH) {
+        fprintf(stderr, "Stack overflow (exceeded %d frames)\n",
+                MAX_STACK_DEPTH);
+        exit(1);
+      }
+
+      /* Get the caller method */
+      if (pStack->top >= 1)
+        caller = pStack->calls[pStack->top - 1].method;
+      else
+        caller = &dataKeys->methods[TOPLEVEL_INDEX];
+      countRecursiveEntries(pStack, pStack->top, caller);
+      caller->elapsedExclusive += currentTime - pStack->lastEventTime;
+#if 0
+      if (caller->elapsedExclusive > 10000000)
+        fprintf(dumpStream, "%llu current %llu last %llu diff %llu\n",
+                caller->elapsedExclusive, currentTime,
+                pStack->lastEventTime,
+                currentTime - pStack->lastEventTime);
+#endif
+      if (caller->recursiveEntries <= 1) {
+        caller->topExclusive += currentTime - pStack->lastEventTime;
+      }
+
+      /* Push the method on the stack for this thread */
+      pStack->calls[pStack->top].method = method;
+      pStack->calls[pStack->top++].entryTime = currentTime;
+    } else {
+      /* This is a method exit */
+      uint64_t entryTime = 0;
+
+      /* Pop the method off the stack for this thread */
+      if (pStack->top > 0) {
+        pStack->top -= 1;
+        entryTime = pStack->calls[pStack->top].entryTime;
+        if (method != pStack->calls[pStack->top].method) {
+          if (method->methodName) {
+            fprintf(stderr, "Exit from method %s.%s %s does not match stack:\n",
+                    method->className, method->methodName, method->signature);
+          } else {
+            fprintf(stderr, "Exit from method %s does not match stack:\n",
+                    method->className);
+          }
+          stackDump(pStack, pStack->top + 1);
+          exit(1);
+        }
+      }
+
+      /* Get the caller method */
+      if (pStack->top >= 1)
+        caller = pStack->calls[pStack->top - 1].method;
+      else
+        caller = &dataKeys->methods[TOPLEVEL_INDEX];
+      countRecursiveEntries(pStack, pStack->top, caller);
+      countRecursiveEntries(pStack, pStack->top, method);
+      uint64_t elapsed = currentTime - entryTime;
+      addInclusiveTime(caller, method, elapsed);
+      method->elapsedExclusive += currentTime - pStack->lastEventTime;
+      if (method->recursiveEntries == 0) {
+        method->topExclusive += currentTime - pStack->lastEventTime;
+      }
+    }
+    /* Remember the time of the last entry or exit event */
+    pStack->lastEventTime = currentTime;
+  }
+
+  /* If we have calls on the stack when the trace ends, then clean
+   * up the stack and add time to the callers by pretending that we
+   * are exiting from their methods now.
+   */
+  uint64_t sumThreadTime = 0;
+  for (int32_t threadId = 0; threadId < MAX_THREADS; ++threadId) {
+    CallStack* pStack = traceData->stacks[threadId];
+
+    /* If this thread never existed, then continue with next thread */
+    if (pStack == nullptr) continue;
+
+    /* Also, add up the time taken by all of the threads */
+    sumThreadTime += pStack->lastEventTime - pStack->threadStartTime;
+
+    for (int32_t ii = 0; ii < pStack->top; ++ii) {
+      if (ii == 0)
+        caller = &dataKeys->methods[TOPLEVEL_INDEX];
+      else
+        caller = pStack->calls[ii - 1].method;
+      MethodEntry* method = pStack->calls[ii].method;
+      countRecursiveEntries(pStack, ii, caller);
+      countRecursiveEntries(pStack, ii, method);
+
+      uint64_t entryTime = pStack->calls[ii].entryTime;
+      uint64_t elapsed = pStack->lastEventTime - entryTime;
+      addInclusiveTime(caller, method, elapsed);
+    }
+  }
+  caller = &dataKeys->methods[TOPLEVEL_INDEX];
+  caller->elapsedInclusive = sumThreadTime;
+
+#if 0
+  fclose(dumpStream);
+#endif
+
+  if (threadTime != nullptr) {
+    *threadTime = sumThreadTime;
+  }
+
+  fclose(dataFp);
+  return dataKeys;
+}
+
+MethodEntry** parseMethodEntries(DataKeys* dataKeys) {
+  /* Create a new array of pointers to the methods and sort the pointers
+   * instead of the actual MethodEntry structs.  We need to do this
+   * because there are other lists that contain pointers to the
+   * MethodEntry structs.
+   */
+  MethodEntry** pMethods = new MethodEntry*[dataKeys->numMethods];
+  for (int32_t ii = 0; ii < dataKeys->numMethods; ++ii) {
+    MethodEntry* entry = &dataKeys->methods[ii];
+    pMethods[ii] = entry;
+  }
+
+  return pMethods;
+}
+
+/*
+ * Produce a function profile from the following methods
+ */
+void profileTrace(TraceData* traceData, MethodEntry** pMethods, int32_t numMethods,
+                  uint64_t sumThreadTime) {
+  /* Print the html header, if necessary */
+  if (gOptions.outputHtml) {
+    printf(htmlHeader, gOptions.sortableUrl);
+    outputTableOfContents();
+  }
+
+  printExclusiveProfile(pMethods, numMethods, sumThreadTime);
+  printInclusiveProfile(pMethods, numMethods, sumThreadTime);
+
+  createClassList(traceData, pMethods, numMethods);
+  printClassProfiles(traceData, sumThreadTime);
+
+  createUniqueMethodList(traceData, pMethods, numMethods);
+  printMethodProfiles(traceData, sumThreadTime);
+
+  if (gOptions.outputHtml) {
+    printf("%s", htmlFooter);
+  }
+}
+
+int32_t compareMethodNamesForDiff(const void* a, const void* b) {
+  const MethodEntry* methodA = *(const MethodEntry**) a;
+  const MethodEntry* methodB = *(const MethodEntry**) b;
+  if (methodA->methodName == nullptr || methodB->methodName == nullptr) {
+    return compareClassNames(a, b);
+  }
+  int32_t result = strcmp(methodA->methodName, methodB->methodName);
+  if (result == 0) {
+    result = strcmp(methodA->signature, methodB->signature);
+    if (result == 0) {
+      return strcmp(methodA->className, methodB->className);
+    }
+  }
+  return result;
+}
+
+int32_t findMatch(MethodEntry** methods, int32_t size, MethodEntry* matchThis) {
+  for (int32_t i = 0; i < size; i++) {
+    MethodEntry* method = methods[i];
+
+    if (method != nullptr && !compareMethodNamesForDiff(&method, &matchThis)) {
+      // printf("%s.%s == %s.%s<br>\n", matchThis->className, matchThis->methodName,
+      //        method->className, method->methodName);
+
+      return i;
+      // if (!compareMethodNames(&method, &matchThis)) return i;
+    }
+  }
+
+  return -1;
+}
+
+int32_t compareDiffEntriesExculsive(const void* a, const void* b) {
+  const DiffEntry* entryA = (const DiffEntry*) a;
+  const DiffEntry* entryB = (const DiffEntry*) b;
+
+  if (entryA->differenceExclusive < entryB->differenceExclusive) {
+    return 1;
+  } else if (entryA->differenceExclusive > entryB->differenceExclusive) {
+    return -1;
+  }
+
+  return 0;
+}
+
+int32_t compareDiffEntriesInculsive(const void* a, const void* b) {
+  const DiffEntry* entryA = (const DiffEntry*) a;
+  const DiffEntry* entryB = (const DiffEntry*) b;
+
+  if (entryA->differenceInclusive < entryB->differenceInclusive) {
+    return 1;
+  } else if (entryA->differenceInclusive > entryB->differenceInclusive) {
+    return -1;
+  }
+
+  return 0;
+}
+
+void printMissingMethod(MethodEntry* method) {
+  char classBuf[HTML_BUFSIZE];
+  char methodBuf[HTML_BUFSIZE];
+
+  char* className = htmlEscape(method->className, classBuf, HTML_BUFSIZE);
+  char* methodName = htmlEscape(method->methodName, methodBuf, HTML_BUFSIZE);
+
+  if (gOptions.outputHtml) printf("<tr><td>\n");
+
+  printf("%s.%s ", className, methodName);
+  if (gOptions.outputHtml) printf("</td><td>");
+
+  printf("%" PRIu64 " ", method->elapsedExclusive);
+  if (gOptions.outputHtml) printf("</td><td>");
+
+  printf("%" PRIu64 " ", method->elapsedInclusive);
+  if (gOptions.outputHtml) printf("</td><td>");
+
+  printf("%d\n", method->numCalls[0]);
+  if (gOptions.outputHtml) printf("</td><td>\n");
+}
+
+void createDiff(DataKeys* d1, DataKeys* d2) {
+  MethodEntry** methods1 = parseMethodEntries(d1);
+  MethodEntry** methods2 = parseMethodEntries(d2);
+
+  // sort and assign the indicies
+  qsort(methods1, d1->numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+  for (int32_t i = 0; i < d1->numMethods; ++i) {
+    methods1[i]->index = i;
+  }
+
+  qsort(methods2, d2->numMethods, sizeof(MethodEntry*), compareElapsedInclusive);
+  for (int32_t i = 0; i < d2->numMethods; ++i) {
+    methods2[i]->index = i;
+  }
+
+  int32_t max = (d1->numMethods < d2->numMethods) ? d2->numMethods : d1->numMethods;
+  max++;
+  DiffEntry* diffs = new DiffEntry[max];
+  memset(diffs, 0, max * sizeof(DiffEntry));
+  DiffEntry* ptr = diffs;
+
+  // printf("<br>d1->numMethods: %d d1->numMethods: %d<br>\n",
+  //        d1->numMethods, d2->numMethods);
+
+  int32_t matches = 0;
+
+  for (int32_t i = 0; i < d1->numMethods; i++) {
+    int32_t match = findMatch(methods2, d2->numMethods, methods1[i]);
+    if (match >= 0) {
+      ptr->method1 = methods1[i];
+      ptr->method2 = methods2[match];
+
+      uint64_t e1 = ptr->method1->elapsedExclusive;
+      uint64_t e2 = ptr->method2->elapsedExclusive;
+      if (e1 > 0) {
+        ptr->differenceExclusive = e2 - e1;
+        ptr->differenceExclusivePercentage = (static_cast<double>(e2) /
+                                              static_cast<double>(e1)) * 100.0;
+      }
+
+      uint64_t i1 = ptr->method1->elapsedInclusive;
+      uint64_t i2 = ptr->method2->elapsedInclusive;
+      if (i1 > 0) {
+        ptr->differenceInclusive = i2 - i1;
+        ptr->differenceInclusivePercentage = (static_cast<double>(i2) /
+                                              static_cast<double>(i1)) * 100.0;
+      }
+
+      // clear these out so we don't find them again and we know which ones
+      // we have left over
+      methods1[i] = nullptr;
+      methods2[match] = nullptr;
+      ptr++;
+
+      matches++;
+    }
+  }
+  ptr->method1 = nullptr;
+  ptr->method2 = nullptr;
+
+  qsort(diffs, matches, sizeof(DiffEntry), compareDiffEntriesExculsive);
+  ptr = diffs;
+
+  if (gOptions.outputHtml) {
+    printf(htmlHeader, gOptions.sortableUrl);
+    printf("<h3>Table of Contents</h3>\n");
+    printf("<ul>\n");
+    printf("<li><a href='#exclusive'>Exclusive</a>\n");
+    printf("<li><a href='#inclusive'>Inclusive</a>\n");
+    printf("</ul>\n");
+    printf("Run 1: %s<br>\n", gOptions.diffFileName);
+    printf("Run 2: %s<br>\n", gOptions.traceFileName);
+    printf("<a name=\"exclusive\"></a><h3 id=\"exclusive\">Exclusive</h3>\n");
+    printf(tableHeader, "exclusive_table");
+  }
+
+  char classBuf[HTML_BUFSIZE];
+  char methodBuf[HTML_BUFSIZE];
+  while (ptr->method1 != nullptr && ptr->method2 != nullptr) {
+    if (gOptions.outputHtml) printf("<tr><td>\n");
+
+    char* className = htmlEscape(ptr->method1->className, classBuf, HTML_BUFSIZE);
+    char* methodName = htmlEscape(ptr->method1->methodName, methodBuf, HTML_BUFSIZE);
+
+    printf("%s.%s ", className, methodName);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method1->elapsedExclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method2->elapsedExclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->differenceExclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%.2f\n", ptr->differenceExclusivePercentage);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method1->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method2->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td></tr>\n");
+
+    ptr++;
+  }
+
+  if (gOptions.outputHtml) printf("</table>\n");
+
+  if (gOptions.outputHtml) {
+    printf(htmlHeader, gOptions.sortableUrl);
+    printf("Run 1: %s<br>\n", gOptions.diffFileName);
+    printf("Run 2: %s<br>\n", gOptions.traceFileName);
+    printf("<a name=\"inclusive\"></a><h3 id=\"inculisve\">Inclusive</h3>\n");
+    printf(tableHeader, "inclusive_table");
+  }
+
+  qsort(diffs, matches, sizeof(DiffEntry), compareDiffEntriesInculsive);
+  ptr = diffs;
+
+  while (ptr->method1 != nullptr && ptr->method2 != nullptr) {
+    if (gOptions.outputHtml) printf("<tr><td>\n");
+
+    char* className = htmlEscape(ptr->method1->className, classBuf, HTML_BUFSIZE);
+    char* methodName = htmlEscape(ptr->method1->methodName, methodBuf, HTML_BUFSIZE);
+
+    printf("%s.%s ", className, methodName);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method1->elapsedInclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->method2->elapsedInclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%" PRIu64 " ", ptr->differenceInclusive);
+    if (gOptions.outputHtml) printf("</td><td>");
+
+    printf("%.2f\n", ptr->differenceInclusivePercentage);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method1->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td><td>\n");
+
+    printf("%d\n", ptr->method2->numCalls[0]);
+    if (gOptions.outputHtml) printf("</td></tr>\n");
+
+    ptr++;
+  }
+
+  if (gOptions.outputHtml) {
+    printf("</table>\n");
+    printf("<h3>Run 1 methods not found in Run 2</h3>");
+    printf(tableHeaderMissing, "?");
+  }
+
+  for (int32_t i = 0; i < d1->numMethods; ++i) {
+    if (methods1[i] != nullptr) {
+      printMissingMethod(methods1[i]);
+    }
+  }
+
+  if (gOptions.outputHtml) {
+    printf("</table>\n");
+    printf("<h3>Run 2 methods not found in Run 1</h3>");
+    printf(tableHeaderMissing, "?");
+  }
+
+  for (int32_t i = 0; i < d2->numMethods; ++i) {
+    if (methods2[i] != nullptr) {
+      printMissingMethod(methods2[i]);
+    }
+  }
+
+  if (gOptions.outputHtml) printf("</body></html\n");
+}
+
+int32_t usage(const char* program) {
+  fprintf(stderr, "Copyright (C) 2006 The Android Open Source Project\n\n");
+  fprintf(stderr,
+          "usage: %s [-ho] [-s sortable] [-d trace-file-name] [-g outfile] "
+          "trace-file-name\n",
+          program);
+  fprintf(stderr, "  -d trace-file-name  - Diff with this trace\n");
+  fprintf(stderr, "  -g outfile          - Write graph to 'outfile'\n");
+  fprintf(stderr,
+          "  -k                  - When writing a graph, keep the intermediate "
+          "DOT file\n");
+  fprintf(stderr, "  -h                  - Turn on HTML output\n");
+  fprintf(
+      stderr,
+      "  -o                  - Dump the dmtrace file instead of profiling\n");
+  fprintf(stderr,
+          "  -s                  - URL base to where the sortable javascript "
+          "file\n");
+  fprintf(stderr,
+          "  -t threshold        - Threshold percentage for including nodes in "
+          "the graph\n");
+  return 2;
+}
+
+// Returns true if there was an error
+int32_t parseOptions(int32_t argc, char** argv) {
+  while (1) {
+    int32_t opt = getopt(argc, argv, "d:hg:kos:t:");
+    if (opt == -1) break;
+    switch (opt) {
+      case 'd':
+        gOptions.diffFileName = optarg;
+        break;
+      case 'g':
+        gOptions.graphFileName = optarg;
+        break;
+      case 'k':
+        gOptions.keepDotFile = 1;
+        break;
+      case 'h':
+        gOptions.outputHtml = 1;
+        break;
+      case 'o':
+        gOptions.dump = 1;
+        break;
+      case 's':
+        gOptions.sortableUrl = optarg;
+        break;
+      case 't':
+        gOptions.threshold = atoi(optarg);
+        break;
+      default:
+        return 1;
+    }
+  }
+  return 0;
+}
+
+/*
+ * Parse args.
+ */
+int32_t main(int32_t argc, char** argv) {
+  gOptions.threshold = -1;
+
+  // Parse the options
+  if (parseOptions(argc, argv) || argc - optind != 1) return usage(argv[0]);
+
+  gOptions.traceFileName = argv[optind];
+
+  if (gOptions.threshold < 0 || 100 <= gOptions.threshold) {
+    gOptions.threshold = 20;
+  }
+
+  if (gOptions.dump) {
+    dumpTrace();
+    return 0;
+  }
+
+  uint64_t sumThreadTime = 0;
+
+  TraceData data1;
+  DataKeys* dataKeys = parseDataKeys(&data1, gOptions.traceFileName, &sumThreadTime);
+  if (dataKeys == nullptr) {
+    fprintf(stderr, "Cannot read \"%s\".\n", gOptions.traceFileName);
+    exit(1);
+  }
+
+  if (gOptions.diffFileName != nullptr) {
+    uint64_t sum2;
+    TraceData data2;
+    DataKeys* d2 = parseDataKeys(&data2, gOptions.diffFileName, &sum2);
+    if (d2 == nullptr) {
+      fprintf(stderr, "Cannot read \"%s\".\n", gOptions.diffFileName);
+      exit(1);
+    }
+
+    createDiff(d2, dataKeys);
+
+    freeDataKeys(d2);
+  } else {
+    MethodEntry** methods = parseMethodEntries(dataKeys);
+    profileTrace(&data1, methods, dataKeys->numMethods, sumThreadTime);
+    if (gOptions.graphFileName != nullptr) {
+      createInclusiveProfileGraphNew(dataKeys);
+    }
+    free(methods);
+  }
+
+  freeDataKeys(dataKeys);
+
+  return 0;
+}