Merge "ART: Make iget receiver mismatch hard verifier error"
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 4dd8024..b7e000a 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -110,7 +110,7 @@
 }
 
 template <typename ElfTypes>
-static ArrayRef<const uint8_t> WriteDebugElfFileForMethodsInternal(
+static std::vector<uint8_t> WriteDebugElfFileForMethodsInternal(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
@@ -126,14 +126,10 @@
                  false /* write_oat_patches */);
   builder->End();
   CHECK(builder->Good());
-  // Make a copy of the buffer.  We want to shrink it anyway.
-  uint8_t* result = new uint8_t[buffer.size()];
-  CHECK(result != nullptr);
-  memcpy(result, buffer.data(), buffer.size());
-  return ArrayRef<const uint8_t>(result, buffer.size());
+  return buffer;
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForMethods(
+std::vector<uint8_t> WriteDebugElfFileForMethods(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<const MethodDebugInfo>& method_infos) {
@@ -145,7 +141,7 @@
 }
 
 template <typename ElfTypes>
-static ArrayRef<const uint8_t> WriteDebugElfFileForClassesInternal(
+static std::vector<uint8_t> WriteDebugElfFileForClassesInternal(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<mirror::Class*>& types)
@@ -164,16 +160,12 @@
 
   builder->End();
   CHECK(builder->Good());
-  // Make a copy of the buffer.  We want to shrink it anyway.
-  uint8_t* result = new uint8_t[buffer.size()];
-  CHECK(result != nullptr);
-  memcpy(result, buffer.data(), buffer.size());
-  return ArrayRef<const uint8_t>(result, buffer.size());
+  return buffer;
 }
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClasses(InstructionSet isa,
-                                                    const InstructionSetFeatures* features,
-                                                    const ArrayRef<mirror::Class*>& types) {
+std::vector<uint8_t> WriteDebugElfFileForClasses(InstructionSet isa,
+                                                 const InstructionSetFeatures* features,
+                                                 const ArrayRef<mirror::Class*>& types) {
   if (Is64BitInstructionSet(isa)) {
     return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, features, types);
   } else {
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 736370e..6f52249 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -47,12 +47,12 @@
     size_t text_section_size,
     const ArrayRef<const MethodDebugInfo>& method_infos);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForMethods(
+std::vector<uint8_t> WriteDebugElfFileForMethods(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<const MethodDebugInfo>& method_infos);
 
-ArrayRef<const uint8_t> WriteDebugElfFileForClasses(
+std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
     const InstructionSetFeatures* features,
     const ArrayRef<mirror::Class*>& types)
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 5294068..be82956 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -416,23 +416,27 @@
                                 type ## _ENTRYPOINT_OFFSET(4, offset)); \
     }
 
-const std::vector<uint8_t>* CompilerDriver::CreateJniDlsymLookup() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateJniDlsymLookup() const {
   CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickGenericJniTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickGenericJniTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickGenericJniTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickImtConflictTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickImtConflictTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickImtConflictTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickResolutionTrampoline() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickResolutionTrampoline()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickResolutionTrampoline)
 }
 
-const std::vector<uint8_t>* CompilerDriver::CreateQuickToInterpreterBridge() const {
+std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickToInterpreterBridge()
+    const {
   CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickToInterpreterBridge)
 }
 #undef CREATE_TRAMPOLINE
@@ -1279,14 +1283,13 @@
   return IsImageClass(descriptor);
 }
 
-bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
+bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
+                                                      uint32_t type_idx) {
   bool result = false;
   if ((IsBootImage() &&
-       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) ||
+       IsImageClass(dex_cache->GetDexFile()->StringDataByIdx(
+           dex_cache->GetDexFile()->GetTypeId(type_idx).descriptor_idx_))) ||
       Runtime::Current()->UseJit()) {
-    ScopedObjectAccess soa(Thread::Current());
-    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-        soa.Self(), dex_file, false);
     mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
     result = (resolved_class != nullptr);
   }
@@ -1328,32 +1331,16 @@
   return result;
 }
 
-bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                                uint32_t type_idx,
-                                                bool* type_known_final, bool* type_known_abstract,
-                                                bool* equals_referrers_class) {
-  if (type_known_final != nullptr) {
-    *type_known_final = false;
-  }
-  if (type_known_abstract != nullptr) {
-    *type_known_abstract = false;
-  }
-  if (equals_referrers_class != nullptr) {
-    *equals_referrers_class = false;
-  }
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
+bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
+                                                Handle<mirror::DexCache> dex_cache,
+                                                uint32_t type_idx) {
   // Get type from dex cache assuming it was populated by the verifier
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     stats_->TypeNeedsAccessCheck();
     return false;  // Unknown class needs access checks.
   }
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
-  if (equals_referrers_class != nullptr) {
-    *equals_referrers_class = (method_id.class_idx_ == type_idx);
-  }
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
   bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
   if (!is_accessible) {
     mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
@@ -1367,12 +1354,6 @@
   }
   if (is_accessible) {
     stats_->TypeDoesntNeedAccessCheck();
-    if (type_known_final != nullptr) {
-      *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass();
-    }
-    if (type_known_abstract != nullptr) {
-      *type_known_abstract = resolved_class->IsAbstract() && !resolved_class->IsArrayClass();
-    }
   } else {
     stats_->TypeNeedsAccessCheck();
   }
@@ -1380,12 +1361,9 @@
 }
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
-                                                            const DexFile& dex_file,
+                                                            Handle<mirror::DexCache> dex_cache,
                                                             uint32_t type_idx,
                                                             bool* finalizable) {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
   // Get type from dex cache assuming it was populated by the verifier.
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
@@ -1395,7 +1373,7 @@
     return false;  // Unknown class needs access checks.
   }
   *finalizable = resolved_class->IsFinalizable();
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
+  const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx);
   bool is_accessible = resolved_class->IsPublic();  // Public classes are always accessible.
   if (!is_accessible) {
     mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
@@ -1583,53 +1561,6 @@
   }
 }
 
-bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                                            bool is_put, MemberOffset* field_offset,
-                                            uint32_t* storage_index, bool* is_referrers_class,
-                                            bool* is_volatile, bool* is_initialized,
-                                            Primitive::Type* type) {
-  ScopedObjectAccess soa(Thread::Current());
-  // Try to resolve the field and compiling method's class.
-  ArtField* resolved_field;
-  mirror::Class* referrer_class;
-  Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache());
-  {
-    StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::ClassLoader> class_loader_handle(
-        hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
-    resolved_field =
-        ResolveField(soa, dex_cache, class_loader_handle, mUnit, field_idx, true);
-    referrer_class = resolved_field != nullptr
-        ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader_handle, mUnit) : nullptr;
-  }
-  bool result = false;
-  if (resolved_field != nullptr && referrer_class != nullptr) {
-    *is_volatile = IsFieldVolatile(resolved_field);
-    std::pair<bool, bool> fast_path = IsFastStaticField(
-        dex_cache.Get(), referrer_class, resolved_field, field_idx, storage_index);
-    result = is_put ? fast_path.second : fast_path.first;
-  }
-  if (result) {
-    *field_offset = GetFieldOffset(resolved_field);
-    *is_referrers_class = IsStaticFieldInReferrerClass(referrer_class, resolved_field);
-    // *is_referrers_class == true implies no worrying about class initialization.
-    *is_initialized = (*is_referrers_class) ||
-        (IsStaticFieldsClassInitialized(referrer_class, resolved_field) &&
-         CanAssumeTypeIsPresentInDexCache(*mUnit->GetDexFile(), *storage_index));
-    *type = resolved_field->GetTypeAsPrimitiveType();
-  } else {
-    // Conservative defaults.
-    *is_volatile = true;
-    *field_offset = MemberOffset(static_cast<size_t>(-1));
-    *storage_index = -1;
-    *is_referrers_class = false;
-    *is_initialized = false;
-    *type = Primitive::kPrimVoid;
-  }
-  ProcessedStaticField(result, *is_referrers_class);
-  return result;
-}
-
 void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType sharp_type,
                                                    bool no_guarantee_of_dex_cache_entry,
                                                    const mirror::Class* referrer_class,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 905f84d..d63dffa 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -161,11 +161,11 @@
   }
 
   // Generate the trampolines that are invoked by unresolved direct methods.
-  const std::vector<uint8_t>* CreateJniDlsymLookup() const;
-  const std::vector<uint8_t>* CreateQuickGenericJniTrampoline() const;
-  const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const;
-  const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const;
-  const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateJniDlsymLookup() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickGenericJniTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickImtConflictTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const;
+  std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const;
 
   CompiledClass* GetCompiledClass(ClassReference ref) const
       REQUIRES(!compiled_classes_lock_);
@@ -195,25 +195,26 @@
 
   // Callbacks from compiler to see what runtime checks must be generated.
 
-  bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx);
+  bool CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
+                                        uint32_t type_idx)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx)
       REQUIRES(!Locks::mutator_lock_);
 
   // Are runtime access checks necessary in the compiled code?
-  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                  uint32_t type_idx, bool* type_known_final = nullptr,
-                                  bool* type_known_abstract = nullptr,
-                                  bool* equals_referrers_class = nullptr)
-      REQUIRES(!Locks::mutator_lock_);
+  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx,
+                                  Handle<mirror::DexCache> dex_cache,
+                                  uint32_t type_idx)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
   // out_is_finalizable is set to whether the type is finalizable.
   bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
-                                              const DexFile& dex_file,
+                                              Handle<mirror::DexCache> dex_cache,
                                               uint32_t type_idx,
                                               bool* out_is_finalizable)
-      REQUIRES(!Locks::mutator_lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
                           bool* is_type_initialized, bool* use_direct_type_ptr,
@@ -368,14 +369,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
 
-  // Can we fastpath static field access? Computes field's offset, volatility and whether the
-  // field is within the referrer (which can avoid checking class initialization).
-  bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
-                              MemberOffset* field_offset, uint32_t* storage_index,
-                              bool* is_referrers_class, bool* is_volatile, bool* is_initialized,
-                              Primitive::Type* type)
-      REQUIRES(!Locks::mutator_lock_);
-
   // Can we fastpath a interface, super class or virtual method call? Computes method's vtable
   // index.
   bool ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc,
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index cda2e27..c8dfc93 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -69,9 +69,9 @@
   DCHECK(jit_compiler != nullptr);
   if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) {
     const ArrayRef<mirror::Class*> types_array(types, count);
-    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
+    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses(
         kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array);
-    CreateJITCodeEntry(std::unique_ptr<const uint8_t[]>(elf_file.data()), elf_file.size());
+    CreateJITCodeEntry(std::move(elf_file));
   }
 }
 
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 8832c84..05c85e0 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -18,6 +18,7 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
 #include "cfi_test.h"
 #include "gtest/gtest.h"
 #include "jni/quick/calling_convention.h"
@@ -42,15 +43,19 @@
     const bool is_static = true;
     const bool is_synchronized = false;
     const char* shorty = "IIFII";
+
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+
     std::unique_ptr<JniCallingConvention> jni_conv(
-        JniCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+        JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
-        ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+        ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
     const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
 
     // Assemble the method.
-    std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+    std::unique_ptr<Assembler> jni_asm(Assembler::Create(&arena, isa));
     jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index cef8c5d..e21f554 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -46,37 +46,51 @@
 
 // Managed runtime calling convention
 
-ManagedRuntimeCallingConvention* ManagedRuntimeCallingConvention::Create(
-    bool is_static, bool is_synchronized, const char* shorty, InstructionSet instruction_set) {
+std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention::Create(
+    ArenaAllocator* arena,
+    bool is_static,
+    bool is_synchronized,
+    const char* shorty,
+    InstructionSet instruction_set) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return new arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) arm::ArmManagedRuntimeCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) arm64::Arm64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) mips::MipsManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) mips64::Mips64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) x86::X86ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (arena) x86_64::X86_64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+      UNREACHABLE();
   }
 }
 
@@ -132,38 +146,46 @@
 
 // JNI calling convention
 
-JniCallingConvention* JniCallingConvention::Create(bool is_static, bool is_synchronized,
-                                                   const char* shorty,
-                                                   InstructionSet instruction_set) {
+std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocator* arena,
+                                                                   bool is_static,
+                                                                   bool is_synchronized,
+                                                                   const char* shorty,
+                                                                   InstructionSet instruction_set) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return new arm::ArmJniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) arm::ArmJniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsJniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) mips::MipsJniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) x86::X86JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty);
+      return std::unique_ptr<JniCallingConvention>(
+          new (arena) x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
-      return nullptr;
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 243d124..2c4b15c 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -18,6 +18,8 @@
 #define ART_COMPILER_JNI_QUICK_CALLING_CONVENTION_H_
 
 #include <vector>
+
+#include "base/arena_object.h"
 #include "handle_scope.h"
 #include "primitive.h"
 #include "thread.h"
@@ -26,7 +28,7 @@
 namespace art {
 
 // Top-level abstraction for different calling conventions.
-class CallingConvention {
+class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConvention> {
  public:
   bool IsReturnAReference() const { return shorty_[0] == 'L'; }
 
@@ -221,9 +223,11 @@
 // | { Method* }             | <-- SP
 class ManagedRuntimeCallingConvention : public CallingConvention {
  public:
-  static ManagedRuntimeCallingConvention* Create(bool is_static, bool is_synchronized,
-                                                 const char* shorty,
-                                                 InstructionSet instruction_set);
+  static std::unique_ptr<ManagedRuntimeCallingConvention> Create(ArenaAllocator* arena,
+                                                                 bool is_static,
+                                                                 bool is_synchronized,
+                                                                 const char* shorty,
+                                                                 InstructionSet instruction_set);
 
   // Register that holds the incoming method argument
   virtual ManagedRegister MethodRegister() = 0;
@@ -249,7 +253,9 @@
   virtual const ManagedRegisterEntrySpills& EntrySpills() = 0;
 
  protected:
-  ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty,
+  ManagedRuntimeCallingConvention(bool is_static,
+                                  bool is_synchronized,
+                                  const char* shorty,
                                   size_t frame_pointer_size)
       : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
 };
@@ -270,8 +276,11 @@
 // callee saves for frames above this one.
 class JniCallingConvention : public CallingConvention {
  public:
-  static JniCallingConvention* Create(bool is_static, bool is_synchronized, const char* shorty,
-                                      InstructionSet instruction_set);
+  static std::unique_ptr<JniCallingConvention> Create(ArenaAllocator* arena,
+                                                      bool is_static,
+                                                      bool is_synchronized,
+                                                      const char* shorty,
+                                                      InstructionSet instruction_set);
 
   // Size of frame excluding space for outgoing args (its assumed Method* is
   // always at the bottom of a frame, but this doesn't work for outgoing
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index b8cda24..27714b8 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -22,6 +22,7 @@
 #include <fstream>
 
 #include "art_method.h"
+#include "base/arena_allocator.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "calling_convention.h"
@@ -69,13 +70,18 @@
   InstructionSet instruction_set = driver->GetInstructionSet();
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
   const bool is_64_bit_target = Is64BitInstructionSet(instruction_set);
+
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+
   // Calling conventions used to iterate over parameters to method
   std::unique_ptr<JniCallingConvention> main_jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+      JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, instruction_set));
   bool reference_return = main_jni_conv->IsReturnAReference();
 
   std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
-      ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+      ManagedRuntimeCallingConvention::Create(
+          &arena, is_static, is_synchronized, shorty, instruction_set));
 
   // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
   //     method and the current thread.
@@ -90,11 +96,12 @@
     jni_end_shorty = "V";
   }
 
-  std::unique_ptr<JniCallingConvention> end_jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set));
+  std::unique_ptr<JniCallingConvention> end_jni_conv(JniCallingConvention::Create(
+      &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
 
   // Assembler that holds generated instructions
-  std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set, instruction_set_features));
+  std::unique_ptr<Assembler> jni_asm(
+      Assembler::Create(&arena, instruction_set, instruction_set_features));
   jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
 
   // Offsets into data structures
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 582ecb3..fa49fc4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -79,7 +79,9 @@
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
   // The thunk just uses the entry point in the ArtMethod. This works even for calls
   // to the generic JNI and interpreter trampolines.
-  arm::Thumb2Assembler assembler;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm::Thumb2Assembler assembler(&arena);
   assembler.LoadFromOffset(
       arm::kLoadWord, arm::PC, arm::R0,
       ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index e3e3121..b4ecbd8 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -247,7 +247,9 @@
 std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() {
   // The thunk just uses the entry point in the ArtMethod. This works even for calls
   // to the generic JNI and interpreter trampolines.
-  arm64::Arm64Assembler assembler;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  arm64::Arm64Assembler assembler(&arena);
   Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
       kArm64PointerSize).Int32Value());
   assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 25c671e..e804bee 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1407,7 +1407,7 @@
       offset = CompiledCode::AlignCode(offset, instruction_set); \
       adjusted_offset = offset + CompiledCode::CodeDelta(instruction_set); \
       oat_header_->Set ## fn_name ## Offset(adjusted_offset); \
-      field.reset(compiler_driver_->Create ## fn_name()); \
+      field = compiler_driver_->Create ## fn_name(); \
       offset += field->size();
 
     DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3049128..45d23fe 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -777,7 +777,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c978aaa..efe4c06 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -904,6 +904,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 185397c..12d1164 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -471,7 +471,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(&isa_features),
+      assembler_(graph->GetArena(), &isa_features),
       isa_features_(isa_features) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 246f5b7..56ac38e 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -417,6 +417,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features) {
   // Save RA (containing the return address) to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(RA));
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 304cf08..1a4e62e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -795,13 +795,16 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
+      assembler_(graph->GetArena()),
       isa_features_(isa_features),
       method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      constant_area_start_(-1),
+      fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_address_offset_(-1) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 056b69b..59cc444 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1007,6 +1007,7 @@
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
+        assembler_(graph->GetArena()),
         isa_features_(isa_features),
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 06b3968..f5e49c2 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -889,8 +889,15 @@
 }
 
 bool HInstructionBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
+
   bool finalizable;
-  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+  bool can_throw = NeedsAccessCheck(type_index, dex_cache, &finalizable);
 
   // Only the non-resolved entrypoint handles the finalizable class case. If we
   // need access checks, then we haven't resolved the method and the class may
@@ -899,16 +906,6 @@
       ? kQuickAllocObject
       : kQuickAllocObjectInitialized;
 
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<3> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
-
   if (outer_dex_cache.Get() != dex_cache.Get()) {
     // We currently do not support inlining allocations across dex files.
     return false;
@@ -921,7 +918,7 @@
       IsOutermostCompilingClass(type_index),
       dex_pc,
       /*needs_access_check*/ can_throw,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, type_index));
 
   AppendInstruction(load_class);
   HInstruction* cls = load_class;
@@ -979,13 +976,9 @@
       HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Thread* self = Thread::Current();
-  StackHandleScope<4> hs(self);
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(
-          self, outer_dex_file)));
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
   Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
@@ -1016,7 +1009,7 @@
         is_outer_class,
         dex_pc,
         /*needs_access_check*/ false,
-        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index));
     AppendInstruction(load_class);
     clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
     AppendInstruction(clinit_check);
@@ -1261,12 +1254,10 @@
 static mirror::Class* GetClassFrom(CompilerDriver* driver,
                                    const DexCompilationUnit& compilation_unit) {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *compilation_unit.GetDexFile();
+  StackHandleScope<1> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(compilation_unit.GetClassLoader())));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      compilation_unit.GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
+  Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
 
   return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
 }
@@ -1281,10 +1272,8 @@
 
 bool HInstructionBuilder::IsOutermostCompilingClass(uint16_t type_index) const {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
@@ -1324,10 +1313,8 @@
   uint16_t field_index = instruction.VRegB_21c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
   ArtField* resolved_field = compiler_driver_->ResolveField(
@@ -1342,8 +1329,7 @@
 
   Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+  Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
 
   // The index at which the field's class is stored in the DexCache's type array.
@@ -1371,7 +1357,7 @@
   }
 
   bool is_in_cache =
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_cache, storage_index);
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
                                                  outer_dex_file,
@@ -1634,21 +1620,16 @@
                                          uint8_t reference,
                                          uint16_t type_index,
                                          uint32_t dex_pc) {
-  bool type_known_final, type_known_abstract, use_declaring_class;
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+
   bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
       dex_compilation_unit_->GetDexMethodIndex(),
-      *dex_compilation_unit_->GetDexFile(),
-      type_index,
-      &type_known_final,
-      &type_known_abstract,
-      &use_declaring_class);
-
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<2> hs(soa.Self());
-  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
-  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+      dex_cache,
+      type_index);
 
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
   HLoadClass* cls = new (arena_) HLoadClass(
@@ -1658,7 +1639,7 @@
       IsOutermostCompilingClass(type_index),
       dex_pc,
       !can_access,
-      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index));
   AppendInstruction(cls);
 
   TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class);
@@ -1676,9 +1657,17 @@
   }
 }
 
-bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index,
+                                           Handle<mirror::DexCache> dex_cache,
+                                           bool* finalizable) const {
   return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
+      dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index, finalizable);
+}
+
+bool HInstructionBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
+  ScopedObjectAccess soa(Thread::Current());
+  Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
+  return NeedsAccessCheck(type_index, dex_cache, finalizable);
 }
 
 bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
@@ -2612,16 +2601,16 @@
 
     case Instruction::CONST_CLASS: {
       uint16_t type_index = instruction.VRegB_21c();
-      bool type_known_final;
-      bool type_known_abstract;
-      bool dont_use_is_referrers_class;
       // `CanAccessTypeWithoutChecks` will tell whether the method being
       // built is trying to access its own class, so that the generated
       // code can optimize for this case. However, the optimization does not
       // work for inlining, so we use `IsOutermostCompilingClass` instead.
+      ScopedObjectAccess soa(Thread::Current());
+      Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
       bool can_access = compiler_driver_->CanAccessTypeWithoutChecks(
-          dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index,
-          &type_known_final, &type_known_abstract, &dont_use_is_referrers_class);
+          dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index);
+      bool is_in_dex_cache =
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_cache, type_index);
       AppendInstruction(new (arena_) HLoadClass(
           graph_->GetCurrentMethod(),
           type_index,
@@ -2629,7 +2618,7 @@
           IsOutermostCompilingClass(type_index),
           dex_pc,
           !can_access,
-          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
+          is_in_dex_cache));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index f480b70..070f7da 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -97,6 +97,10 @@
 
   // Returns whether the current method needs access check for the type.
   // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index,
+                        Handle<mirror::DexCache> dex_cache,
+                        /*out*/bool* finalizable) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
   bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
 
   template<typename T>
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 7a1e06b..5a0b89c 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -79,8 +79,15 @@
 
 void LICM::Run() {
   DCHECK(side_effects_.HasRun());
+
   // Only used during debug.
-  ArenaBitVector visited(graph_->GetArena(), graph_->GetBlocks().size(), false, kArenaAllocLICM);
+  ArenaBitVector* visited = nullptr;
+  if (kIsDebugBuild) {
+    visited = new (graph_->GetArena()) ArenaBitVector(graph_->GetArena(),
+                                                      graph_->GetBlocks().size(),
+                                                      false,
+                                                      kArenaAllocLICM);
+  }
 
   // Post order visit to visit inner loops before outer loops.
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -109,10 +116,12 @@
       DCHECK(inner->IsInLoop());
       if (inner->GetLoopInformation() != loop_info) {
         // Thanks to post order visit, inner loops were already visited.
-        DCHECK(visited.IsBitSet(inner->GetBlockId()));
+        DCHECK(visited->IsBitSet(inner->GetBlockId()));
         continue;
       }
-      visited.SetBit(inner->GetBlockId());
+      if (kIsDebugBuild) {
+        visited->SetBit(inner->GetBlockId());
+      }
 
       if (contains_irreducible_loop) {
         // We cannot licm in an irreducible loop, or in a natural loop containing an
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index cad94c7..3670ce2 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -948,13 +948,11 @@
     info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
     info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
-    ArrayRef<const uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
+    std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods(
         GetCompilerDriver()->GetInstructionSet(),
         GetCompilerDriver()->GetInstructionSetFeatures(),
         ArrayRef<const debug::MethodDebugInfo>(&info, 1));
-    CreateJITCodeEntryForAddress(code_address,
-                                 std::unique_ptr<const uint8_t[]>(elf_file.data()),
-                                 elf_file.size());
+    CreateJITCodeEntryForAddress(code_address, std::move(elf_file));
   }
 
   Runtime::Current()->GetJit()->AddMemoryUsage(method, arena.BytesUsed());
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 97f2aee..719feec 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -969,6 +969,38 @@
     return false;
   }
 
+  bool IsLinearOrderWellFormed(const HGraph& graph) {
+    for (HBasicBlock* header : graph.GetBlocks()) {
+      if (!header->IsLoopHeader()) {
+        continue;
+      }
+
+      HLoopInformation* loop = header->GetLoopInformation();
+      size_t num_blocks = loop->GetBlocks().NumSetBits();
+      size_t found_blocks = 0u;
+
+      for (HLinearOrderIterator it(graph); !it.Done(); it.Advance()) {
+        HBasicBlock* current = it.Current();
+        if (loop->Contains(*current)) {
+          found_blocks++;
+          if (found_blocks == 1u && current != header) {
+            // First block is not the header.
+            return false;
+          } else if (found_blocks == num_blocks && !loop->IsBackEdge(*current)) {
+            // Last block is not a back edge.
+            return false;
+          }
+        } else if (found_blocks != 0u && found_blocks != num_blocks) {
+          // Blocks are not adjacent.
+          return false;
+        }
+      }
+      DCHECK_EQ(found_blocks, num_blocks);
+    }
+
+    return true;
+  }
+
   void AddBackEdgeUses(const HBasicBlock& block_at_use) {
     DCHECK(block_at_use.IsInLoop());
     // Add synthesized uses at the back edge of loops to help the register allocator.
@@ -995,12 +1027,30 @@
       if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
         // There was a use already seen in this loop. Therefore the previous call to `AddUse`
         // already inserted the backedge use. We can stop going outward.
-        DCHECK(HasSynthesizeUseAt(back_edge_use_position));
+        if (kIsDebugBuild) {
+          if (!HasSynthesizeUseAt(back_edge_use_position)) {
+            // There exists a use prior to `back_edge_use_position` but there is
+            // no synthesized use at the back edge. This can happen in the presence
+            // of irreducible loops, when blocks of the loop are not adjacent in
+            // linear order, i.e. when there is an out-of-loop block between
+            // `block_at_use` and `back_edge_position` that uses this interval.
+            DCHECK(block_at_use.GetGraph()->HasIrreducibleLoops());
+            DCHECK(!IsLinearOrderWellFormed(*block_at_use.GetGraph()));
+          }
+        }
         break;
       }
 
-      DCHECK(last_in_new_list == nullptr
-             || back_edge_use_position > last_in_new_list->GetPosition());
+      if (last_in_new_list != nullptr &&
+          back_edge_use_position <= last_in_new_list->GetPosition()) {
+        // Loops are not properly nested in the linear order, i.e. the back edge
+        // of an outer loop preceeds blocks of an inner loop. This can happen
+        // in the presence of irreducible loops.
+        DCHECK(block_at_use.GetGraph()->HasIrreducibleLoops());
+        DCHECK(!IsLinearOrderWellFormed(*block_at_use.GetGraph()));
+        // We must bail out, otherwise we would generate an unsorted use list.
+        break;
+      }
 
       UsePosition* new_use = new (allocator_) UsePosition(
           /* user */ nullptr,
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 48465e6..1ee1c4d 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -16,6 +16,7 @@
 
 #include "trampoline_compiler.h"
 
+#include "base/arena_allocator.h"
 #include "jni_env_ext.h"
 
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -48,9 +49,9 @@
 
 #ifdef ART_ENABLE_CODEGEN_arm
 namespace arm {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<4> offset) {
-  Thumb2Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<4> offset) {
+  Thumb2Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
@@ -68,19 +69,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace arm
 #endif  // ART_ENABLE_CODEGEN_arm
 
 #ifdef ART_ENABLE_CODEGEN_arm64
 namespace arm64 {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<8> offset) {
-  Arm64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<8> offset) {
+  Arm64Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
@@ -107,19 +108,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace arm64
 #endif  // ART_ENABLE_CODEGEN_arm64
 
 #ifdef ART_ENABLE_CODEGEN_mips
 namespace mips {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<4> offset) {
-  MipsAssembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<4> offset) {
+  MipsAssembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -139,19 +140,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace mips
 #endif  // ART_ENABLE_CODEGEN_mips
 
 #ifdef ART_ENABLE_CODEGEN_mips64
 namespace mips64 {
-static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi,
-                                                    ThreadOffset<8> offset) {
-  Mips64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
+    ArenaAllocator* arena, EntryPointCallingConvention abi, ThreadOffset<8> offset) {
+  Mips64Assembler assembler(arena);
 
   switch (abi) {
     case kInterpreterAbi:  // Thread* is first argument (A0) in interpreter ABI.
@@ -171,18 +172,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace mips64
 #endif  // ART_ENABLE_CODEGEN_mips
 
 #ifdef ART_ENABLE_CODEGEN_x86
 namespace x86 {
-static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) {
-  X86Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
+                                                                    ThreadOffset<4> offset) {
+  X86Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in fs.
   __ fs()->jmp(Address::Absolute(offset));
@@ -191,18 +193,19 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace x86
 #endif  // ART_ENABLE_CODEGEN_x86
 
 #ifdef ART_ENABLE_CODEGEN_x86_64
 namespace x86_64 {
-static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) {
-  x86_64::X86_64Assembler assembler;
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* arena,
+                                                                    ThreadOffset<8> offset) {
+  x86_64::X86_64Assembler assembler(arena);
 
   // All x86 trampolines call via the Thread* held in gs.
   __ gs()->jmp(x86_64::Address::Absolute(offset, true));
@@ -211,28 +214,31 @@
   __ FinalizeCode();
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
-  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
   __ FinalizeInstructions(code);
 
-  return entry_stub.release();
+  return std::move(entry_stub);
 }
 }  // namespace x86_64
 #endif  // ART_ENABLE_CODEGEN_x86_64
 
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<8> offset) {
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<8> offset) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return arm64::CreateTrampoline(abi, offset);
+      return arm64::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return mips64::CreateTrampoline(abi, offset);
+      return mips64::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return x86_64::CreateTrampoline(offset);
+      return x86_64::CreateTrampoline(&arena, offset);
 #endif
     default:
       UNUSED(abi);
@@ -242,22 +248,25 @@
   }
 }
 
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi,
-                                               ThreadOffset<4> offset) {
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<4> offset) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
   switch (isa) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
-      return arm::CreateTrampoline(abi, offset);
+      return arm::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return mips::CreateTrampoline(abi, offset);
+      return mips::CreateTrampoline(&arena, abi, offset);
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       UNUSED(abi);
-      return x86::CreateTrampoline(offset);
+      return x86::CreateTrampoline(&arena, offset);
 #endif
     default:
       LOG(FATAL) << "Unexpected InstructionSet: " << isa;
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 66d5ac3..8f823f1 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -25,12 +25,12 @@
 namespace art {
 
 // Create code that will invoke the function held in thread local storage.
-const std::vector<uint8_t>* CreateTrampoline32(InstructionSet isa,
-                                               EntryPointCallingConvention abi,
-                                               ThreadOffset<4> entry_point_offset);
-const std::vector<uint8_t>* CreateTrampoline64(InstructionSet isa,
-                                               EntryPointCallingConvention abi,
-                                               ThreadOffset<8> entry_point_offset);
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<4> entry_point_offset);
+std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa,
+                                                               EntryPointCallingConvention abi,
+                                                               ThreadOffset<8> entry_point_offset);
 
 }  // namespace art
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index dead8fd..e5f91dc 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -845,7 +845,7 @@
 
 void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath(scratch, stack_adjust);
+  ArmExceptionSlowPath* slow = new (GetArena()) ArmExceptionSlowPath(scratch, stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  TR, Thread::ExceptionOffset<4>().Int32Value());
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index a894565..ffbe786 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -20,6 +20,8 @@
 #include <type_traits>
 #include <vector>
 
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
@@ -1078,6 +1080,9 @@
   }
 
  protected:
+  explicit ArmAssembler(ArenaAllocator* arena)
+      : Assembler(arena), tracked_labels_(arena->Adapter(kArenaAllocAssembler)) {}
+
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
@@ -1086,7 +1091,7 @@
   void FinalizeTrackedLabels();
 
   // Tracked labels. Use a vector, as we need to sort before adjusting.
-  std::vector<Label*> tracked_labels_;
+  ArenaVector<Label*> tracked_labels_;
 };
 
 // Slowpath entered when Thread::Current()->_exception is non-null
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index e3e05ca..bc6020e 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -30,8 +30,7 @@
 
 class Arm32Assembler FINAL : public ArmAssembler {
  public:
-  Arm32Assembler() {
-  }
+  explicit Arm32Assembler(ArenaAllocator* arena) : ArmAssembler(arena) {}
   virtual ~Arm32Assembler() {}
 
   bool IsThumb() const OVERRIDE {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 15298b3..26f7d0d 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -59,8 +59,8 @@
     return;
   }
   // Create and fill in the fixup_dependents_.
-  assembler->fixup_dependents_.reset(new FixupId[number_of_dependents]);
-  FixupId* dependents = assembler->fixup_dependents_.get();
+  assembler->fixup_dependents_.resize(number_of_dependents);
+  FixupId* dependents = assembler->fixup_dependents_.data();
   for (FixupId fixup_id = 0u; fixup_id != end_id; ++fixup_id) {
     uint32_t target = fixups[fixup_id].target_;
     if (target > fixups[fixup_id].location_) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 6b61aca..111a6b0 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/logging.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
@@ -33,14 +34,16 @@
 
 class Thumb2Assembler FINAL : public ArmAssembler {
  public:
-  explicit Thumb2Assembler(bool can_relocate_branches = true)
-      : can_relocate_branches_(can_relocate_branches),
+  explicit Thumb2Assembler(ArenaAllocator* arena, bool can_relocate_branches = true)
+      : ArmAssembler(arena),
+        can_relocate_branches_(can_relocate_branches),
         force_32bit_(false),
         it_cond_index_(kNoItCondition),
         next_condition_(AL),
-        fixups_(),
-        fixup_dependents_(),
-        literals_(),
+        fixups_(arena->Adapter(kArenaAllocAssembler)),
+        fixup_dependents_(arena->Adapter(kArenaAllocAssembler)),
+        literals_(arena->Adapter(kArenaAllocAssembler)),
+        jump_tables_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0u),
         last_old_position_(0u),
         last_fixup_id_(0u) {
@@ -558,9 +561,9 @@
     // Prepare the assembler->fixup_dependents_ and each Fixup's dependents_start_/count_.
     static void PrepareDependents(Thumb2Assembler* assembler);
 
-    ArrayRef<FixupId> Dependents(const Thumb2Assembler& assembler) const {
-      return ArrayRef<FixupId>(assembler.fixup_dependents_.get() + dependents_start_,
-                               dependents_count_);
+    ArrayRef<const FixupId> Dependents(const Thumb2Assembler& assembler) const {
+      return ArrayRef<const FixupId>(assembler.fixup_dependents_).SubArray(dependents_start_,
+                                                                           dependents_count_);
     }
 
     // Resolve a branch when the target is known.
@@ -839,15 +842,15 @@
   static int16_t AdrEncoding16(Register rd, int32_t offset);
   static int32_t AdrEncoding32(Register rd, int32_t offset);
 
-  std::vector<Fixup> fixups_;
-  std::unique_ptr<FixupId[]> fixup_dependents_;
+  ArenaVector<Fixup> fixups_;
+  ArenaVector<FixupId> fixup_dependents_;
 
   // Use std::deque<> for literal labels to allow insertions at the end
   // without invalidating pointers and references to existing elements.
-  std::deque<Literal> literals_;
+  ArenaDeque<Literal> literals_;
 
   // Jump table list.
-  std::deque<JumpTable> jump_tables_;
+  ArenaDeque<JumpTable> jump_tables_;
 
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 0e17512..eb851f9 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -32,10 +32,8 @@
 #endif
 
 void Arm64Assembler::FinalizeCode() {
-  if (!exception_blocks_.empty()) {
-    for (size_t i = 0; i < exception_blocks_.size(); i++) {
-      EmitExceptionPoll(exception_blocks_.at(i));
-    }
+  for (Arm64Exception* exception : exception_blocks_) {
+    EmitExceptionPoll(exception);
   }
   ___ FinalizeCode();
 }
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 7b25b8f..03ae996 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -21,6 +21,7 @@
 #include <memory>
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/logging.h"
 #include "constants_arm64.h"
 #include "utils/arm64/managed_register_arm64.h"
@@ -67,7 +68,10 @@
  public:
   // We indicate the size of the initial code generation buffer to the VIXL
   // assembler. From there we it will automatically manage the buffer.
-  Arm64Assembler() : vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
+  explicit Arm64Assembler(ArenaAllocator* arena)
+      : Assembler(arena),
+        exception_blocks_(arena->Adapter(kArenaAllocAssembler)),
+        vixl_masm_(new vixl::MacroAssembler(kArm64BaseBufferSize)) {}
 
   virtual ~Arm64Assembler() {
     delete vixl_masm_;
@@ -249,7 +253,7 @@
   void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al);
 
   // List of exception blocks to generate at the end of the code cache.
-  std::vector<Arm64Exception*> exception_blocks_;
+  ArenaVector<Arm64Exception*> exception_blocks_;
 
  public:
   // Vixl assembler.
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index f784d2c..c2aa574 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -44,14 +44,10 @@
 
 namespace art {
 
-static uint8_t* NewContents(size_t capacity) {
-  return new uint8_t[capacity];
-}
-
-
-AssemblerBuffer::AssemblerBuffer() {
+AssemblerBuffer::AssemblerBuffer(ArenaAllocator* arena)
+    : arena_(arena) {
   static const size_t kInitialBufferCapacity = 4 * KB;
-  contents_ = NewContents(kInitialBufferCapacity);
+  contents_ = arena_->AllocArray<uint8_t>(kInitialBufferCapacity);
   cursor_ = contents_;
   limit_ = ComputeLimit(contents_, kInitialBufferCapacity);
   fixup_ = nullptr;
@@ -68,7 +64,9 @@
 
 
 AssemblerBuffer::~AssemblerBuffer() {
-  delete[] contents_;
+  if (arena_->IsRunningOnMemoryTool()) {
+    arena_->MakeInaccessible(contents_, Capacity());
+  }
 }
 
 
@@ -100,19 +98,12 @@
   new_capacity = std::max(new_capacity, min_capacity);
 
   // Allocate the new data area and copy contents of the old one to it.
-  uint8_t* new_contents = NewContents(new_capacity);
-  memmove(reinterpret_cast<void*>(new_contents),
-          reinterpret_cast<void*>(contents_),
-          old_size);
-
-  // Compute the relocation delta and switch to the new contents area.
-  ptrdiff_t delta = new_contents - contents_;
-  delete[] contents_;
-  contents_ = new_contents;
+  contents_ = reinterpret_cast<uint8_t*>(
+      arena_->Realloc(contents_, old_capacity, new_capacity, kArenaAllocAssembler));
 
   // Update the cursor and recompute the limit.
-  cursor_ += delta;
-  limit_ = ComputeLimit(new_contents, new_capacity);
+  cursor_ = contents_ + old_size;
+  limit_ = ComputeLimit(contents_, new_capacity);
 
   // Verify internal state.
   CHECK_EQ(Capacity(), new_capacity);
@@ -129,36 +120,40 @@
   }
 }
 
-Assembler* Assembler::Create(InstructionSet instruction_set,
-                             const InstructionSetFeatures* instruction_set_features) {
+std::unique_ptr<Assembler> Assembler::Create(
+    ArenaAllocator* arena,
+    InstructionSet instruction_set,
+    const InstructionSetFeatures* instruction_set_features) {
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
-      return new arm::Arm32Assembler();
+      return std::unique_ptr<Assembler>(new (arena) arm::Arm32Assembler(arena));
     case kThumb2:
-      return new arm::Thumb2Assembler();
+      return std::unique_ptr<Assembler>(new (arena) arm::Thumb2Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
-      return new arm64::Arm64Assembler();
+      return std::unique_ptr<Assembler>(new (arena) arm64::Arm64Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
-      return new mips::MipsAssembler(instruction_set_features != nullptr
-                                         ? instruction_set_features->AsMipsInstructionSetFeatures()
-                                         : nullptr);
+      return std::unique_ptr<Assembler>(new (arena) mips::MipsAssembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMipsInstructionSetFeatures()
+              : nullptr));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return new mips64::Mips64Assembler();
+      return std::unique_ptr<Assembler>(new (arena) mips64::Mips64Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
-      return new x86::X86Assembler();
+      return std::unique_ptr<Assembler>(new (arena) x86::X86Assembler(arena));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
-      return new x86_64::X86_64Assembler();
+      return std::unique_ptr<Assembler>(new (arena) x86_64::X86_64Assembler(arena));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 414ea7e..4ea85a2 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -22,6 +22,8 @@
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
 #include "arm/constants_arm.h"
+#include "base/arena_allocator.h"
+#include "base/arena_object.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "debug/dwarf/debug_frame_opcode_writer.h"
@@ -60,7 +62,7 @@
 };
 
 // Parent of all queued slow paths, emitted during finalization
-class SlowPath {
+class SlowPath : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
   SlowPath() : next_(nullptr) {}
   virtual ~SlowPath() {}
@@ -85,9 +87,13 @@
 
 class AssemblerBuffer {
  public:
-  AssemblerBuffer();
+  explicit AssemblerBuffer(ArenaAllocator* arena);
   ~AssemblerBuffer();
 
+  ArenaAllocator* GetArena() {
+    return arena_;
+  }
+
   // Basic support for emitting, loading, and storing.
   template<typename T> void Emit(T value) {
     CHECK(HasEnsuredCapacity());
@@ -235,6 +241,7 @@
   // for a single, fast space check per instruction.
   static const int kMinimumGap = 32;
 
+  ArenaAllocator* arena_;
   uint8_t* contents_;
   uint8_t* cursor_;
   uint8_t* limit_;
@@ -338,10 +345,12 @@
   std::vector<DelayedAdvancePC> delayed_advance_pcs_;
 };
 
-class Assembler {
+class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
-  static Assembler* Create(InstructionSet instruction_set,
-                           const InstructionSetFeatures* instruction_set_features = nullptr);
+  static std::unique_ptr<Assembler> Create(
+      ArenaAllocator* arena,
+      InstructionSet instruction_set,
+      const InstructionSetFeatures* instruction_set_features = nullptr);
 
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
   virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
@@ -504,7 +513,11 @@
   DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
 
  protected:
-  Assembler() : buffer_(), cfi_(this) {}
+  explicit Assembler(ArenaAllocator* arena) : buffer_(arena), cfi_(this) {}
+
+  ArenaAllocator* GetArena() {
+    return buffer_.GetArena();
+  }
 
   AssemblerBuffer buffer_;
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 2579ddb..084e901 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -460,7 +460,8 @@
   explicit AssemblerTest() {}
 
   void SetUp() OVERRIDE {
-    assembler_.reset(new Ass());
+    arena_.reset(new ArenaAllocator(&pool_));
+    assembler_.reset(new (arena_.get()) Ass(arena_.get()));
     test_helper_.reset(
         new AssemblerTestInfrastructure(GetArchitectureString(),
                                         GetAssemblerCmdName(),
@@ -476,6 +477,8 @@
 
   void TearDown() OVERRIDE {
     test_helper_.reset();  // Clean up the helper.
+    assembler_.reset();
+    arena_.reset();
   }
 
   // Override this to set up any architecture-specific things, e.g., register vectors.
@@ -919,6 +922,8 @@
 
   static constexpr size_t kWarnManyCombinationsThreshold = 500;
 
+  ArenaPool pool_;
+  std::unique_ptr<ArenaAllocator> arena_;
   std::unique_ptr<Ass> assembler_;
   std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2df9b17..c67cb5a 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -195,11 +195,18 @@
 
 #undef __
 
+class Thumb2AssemblerTest : public ::testing::Test {
+ public:
+  Thumb2AssemblerTest() : pool(), arena(&pool), assembler(&arena) { }
+
+  ArenaPool pool;
+  ArenaAllocator arena;
+  arm::Thumb2Assembler assembler;
+};
+
 #define __ assembler.
 
-TEST(Thumb2AssemblerTest, SimpleMov) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleMov) {
   __ movs(R0, ShifterOperand(R1));
   __ mov(R0, ShifterOperand(R1));
   __ mov(R8, ShifterOperand(R9));
@@ -210,8 +217,7 @@
   EmitAndCheck(&assembler, "SimpleMov");
 }
 
-TEST(Thumb2AssemblerTest, SimpleMov32) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, SimpleMov32) {
   __ Force32Bit();
 
   __ mov(R0, ShifterOperand(R1));
@@ -220,9 +226,7 @@
   EmitAndCheck(&assembler, "SimpleMov32");
 }
 
-TEST(Thumb2AssemblerTest, SimpleMovAdd) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleMovAdd) {
   __ mov(R0, ShifterOperand(R1));
   __ adds(R0, R1, ShifterOperand(R2));
   __ add(R0, R1, ShifterOperand(0));
@@ -230,9 +234,7 @@
   EmitAndCheck(&assembler, "SimpleMovAdd");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingRegister) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingRegister) {
   // 32 bit variants using low registers.
   __ mvn(R0, ShifterOperand(R1), AL, kCcKeep);
   __ add(R0, R1, ShifterOperand(R2), AL, kCcKeep);
@@ -364,9 +366,7 @@
   EmitAndCheck(&assembler, "DataProcessingRegister");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingImmediate) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingImmediate) {
   __ mov(R0, ShifterOperand(0x55));
   __ mvn(R0, ShifterOperand(0x55));
   __ add(R0, R1, ShifterOperand(0x55));
@@ -397,9 +397,7 @@
   EmitAndCheck(&assembler, "DataProcessingImmediate");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingModifiedImmediate) {
   __ mov(R0, ShifterOperand(0x550055));
   __ mvn(R0, ShifterOperand(0x550055));
   __ add(R0, R1, ShifterOperand(0x550055));
@@ -422,9 +420,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingModifiedImmediates) {
   __ mov(R0, ShifterOperand(0x550055));
   __ mov(R0, ShifterOperand(0x55005500));
   __ mov(R0, ShifterOperand(0x55555555));
@@ -436,9 +432,7 @@
   EmitAndCheck(&assembler, "DataProcessingModifiedImmediates");
 }
 
-TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, DataProcessingShiftedRegister) {
   // 16-bit variants.
   __ movs(R3, ShifterOperand(R4, LSL, 4));
   __ movs(R3, ShifterOperand(R4, LSR, 5));
@@ -467,10 +461,9 @@
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
 
-TEST(Thumb2AssemblerTest, ShiftImmediate) {
+TEST_F(Thumb2AssemblerTest, ShiftImmediate) {
   // Note: This test produces the same results as DataProcessingShiftedRegister
   // but it does so using shift functions instead of mov().
-  arm::Thumb2Assembler assembler;
 
   // 16-bit variants.
   __ Lsl(R3, R4, 4);
@@ -500,9 +493,7 @@
   EmitAndCheck(&assembler, "ShiftImmediate");
 }
 
-TEST(Thumb2AssemblerTest, BasicLoad) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicLoad) {
   __ ldr(R3, Address(R4, 24));
   __ ldrb(R3, Address(R4, 24));
   __ ldrh(R3, Address(R4, 24));
@@ -522,9 +513,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, BasicStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicStore) {
   __ str(R3, Address(R4, 24));
   __ strb(R3, Address(R4, 24));
   __ strh(R3, Address(R4, 24));
@@ -539,9 +528,7 @@
   EmitAndCheck(&assembler, "BasicStore");
 }
 
-TEST(Thumb2AssemblerTest, ComplexLoad) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexLoad) {
   __ ldr(R3, Address(R4, 24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex));
   __ ldr(R3, Address(R4, 24, Address::Mode::PostIndex));
@@ -581,9 +568,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, ComplexStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexStore) {
   __ str(R3, Address(R4, 24, Address::Mode::Offset));
   __ str(R3, Address(R4, 24, Address::Mode::PreIndex));
   __ str(R3, Address(R4, 24, Address::Mode::PostIndex));
@@ -608,9 +593,7 @@
   EmitAndCheck(&assembler, "ComplexStore");
 }
 
-TEST(Thumb2AssemblerTest, NegativeLoadStore) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, NegativeLoadStore) {
   __ ldr(R3, Address(R4, -24, Address::Mode::Offset));
   __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex));
   __ ldr(R3, Address(R4, -24, Address::Mode::PostIndex));
@@ -670,18 +653,14 @@
   EmitAndCheck(&assembler, "NegativeLoadStore");
 }
 
-TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleLoadStoreDual) {
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ ldrd(R2, Address(R0, 24, Address::Mode::Offset));
 
   EmitAndCheck(&assembler, "SimpleLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, ComplexLoadStoreDual) {
   __ strd(R2, Address(R0, 24, Address::Mode::Offset));
   __ strd(R2, Address(R0, 24, Address::Mode::PreIndex));
   __ strd(R2, Address(R0, 24, Address::Mode::PostIndex));
@@ -699,9 +678,7 @@
   EmitAndCheck(&assembler, "ComplexLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, NegativeLoadStoreDual) {
   __ strd(R2, Address(R0, -24, Address::Mode::Offset));
   __ strd(R2, Address(R0, -24, Address::Mode::PreIndex));
   __ strd(R2, Address(R0, -24, Address::Mode::PostIndex));
@@ -719,9 +696,7 @@
   EmitAndCheck(&assembler, "NegativeLoadStoreDual");
 }
 
-TEST(Thumb2AssemblerTest, SimpleBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SimpleBranch) {
   Label l1;
   __ mov(R0, ShifterOperand(2));
   __ Bind(&l1);
@@ -757,8 +732,7 @@
   EmitAndCheck(&assembler, "SimpleBranch");
 }
 
-TEST(Thumb2AssemblerTest, LongBranch) {
-  arm::Thumb2Assembler assembler;
+TEST_F(Thumb2AssemblerTest, LongBranch) {
   __ Force32Bit();
   // 32 bit branches.
   Label l1;
@@ -797,9 +771,7 @@
   EmitAndCheck(&assembler, "LongBranch");
 }
 
-TEST(Thumb2AssemblerTest, LoadMultiple) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadMultiple) {
   // 16 bit.
   __ ldm(DB_W, R4, (1 << R0 | 1 << R3));
 
@@ -813,9 +785,7 @@
   EmitAndCheck(&assembler, "LoadMultiple");
 }
 
-TEST(Thumb2AssemblerTest, StoreMultiple) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StoreMultiple) {
   // 16 bit.
   __ stm(IA_W, R4, (1 << R0 | 1 << R3));
 
@@ -830,9 +800,7 @@
   EmitAndCheck(&assembler, "StoreMultiple");
 }
 
-TEST(Thumb2AssemblerTest, MovWMovT) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, MovWMovT) {
   // Always 32 bit.
   __ movw(R4, 0);
   __ movw(R4, 0x34);
@@ -848,9 +816,7 @@
   EmitAndCheck(&assembler, "MovWMovT");
 }
 
-TEST(Thumb2AssemblerTest, SpecialAddSub) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, SpecialAddSub) {
   __ add(R2, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(SP, SP, ShifterOperand(0x50));   // 16 bit.
   __ add(R8, SP, ShifterOperand(0x50));   // 32 bit.
@@ -869,9 +835,7 @@
   EmitAndCheck(&assembler, "SpecialAddSub");
 }
 
-TEST(Thumb2AssemblerTest, LoadFromOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadFromOffset) {
   __ LoadFromOffset(kLoadWord, R2, R4, 12);
   __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
   __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
@@ -901,9 +865,7 @@
   EmitAndCheck(&assembler, "LoadFromOffset");
 }
 
-TEST(Thumb2AssemblerTest, StoreToOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StoreToOffset) {
   __ StoreToOffset(kStoreWord, R2, R4, 12);
   __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
   __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
@@ -931,9 +893,7 @@
   EmitAndCheck(&assembler, "StoreToOffset");
 }
 
-TEST(Thumb2AssemblerTest, IfThen) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, IfThen) {
   __ it(EQ);
   __ mov(R1, ShifterOperand(1), EQ);
 
@@ -964,9 +924,7 @@
   EmitAndCheck(&assembler, "IfThen");
 }
 
-TEST(Thumb2AssemblerTest, CbzCbnz) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CbzCbnz) {
   Label l1;
   __ cbz(R2, &l1);
   __ mov(R1, ShifterOperand(3));
@@ -984,9 +942,7 @@
   EmitAndCheck(&assembler, "CbzCbnz");
 }
 
-TEST(Thumb2AssemblerTest, Multiply) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Multiply) {
   __ mul(R0, R1, R0);
   __ mul(R0, R1, R2);
   __ mul(R8, R9, R8);
@@ -1004,9 +960,7 @@
   EmitAndCheck(&assembler, "Multiply");
 }
 
-TEST(Thumb2AssemblerTest, Divide) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Divide) {
   __ sdiv(R0, R1, R2);
   __ sdiv(R8, R9, R10);
 
@@ -1016,9 +970,7 @@
   EmitAndCheck(&assembler, "Divide");
 }
 
-TEST(Thumb2AssemblerTest, VMov) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, VMov) {
   __ vmovs(S1, 1.0);
   __ vmovd(D1, 1.0);
 
@@ -1029,9 +981,7 @@
 }
 
 
-TEST(Thumb2AssemblerTest, BasicFloatingPoint) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, BasicFloatingPoint) {
   __ vadds(S0, S1, S2);
   __ vsubs(S0, S1, S2);
   __ vmuls(S0, S1, S2);
@@ -1055,9 +1005,7 @@
   EmitAndCheck(&assembler, "BasicFloatingPoint");
 }
 
-TEST(Thumb2AssemblerTest, FloatingPointConversions) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, FloatingPointConversions) {
   __ vcvtsd(S2, D2);
   __ vcvtds(D2, S2);
 
@@ -1076,9 +1024,7 @@
   EmitAndCheck(&assembler, "FloatingPointConversions");
 }
 
-TEST(Thumb2AssemblerTest, FloatingPointComparisons) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, FloatingPointComparisons) {
   __ vcmps(S0, S1);
   __ vcmpd(D0, D1);
 
@@ -1088,35 +1034,27 @@
   EmitAndCheck(&assembler, "FloatingPointComparisons");
 }
 
-TEST(Thumb2AssemblerTest, Calls) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Calls) {
   __ blx(LR);
   __ bx(LR);
 
   EmitAndCheck(&assembler, "Calls");
 }
 
-TEST(Thumb2AssemblerTest, Breakpoint) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Breakpoint) {
   __ bkpt(0);
 
   EmitAndCheck(&assembler, "Breakpoint");
 }
 
-TEST(Thumb2AssemblerTest, StrR1) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, StrR1) {
   __ str(R1, Address(SP, 68));
   __ str(R1, Address(SP, 1068));
 
   EmitAndCheck(&assembler, "StrR1");
 }
 
-TEST(Thumb2AssemblerTest, VPushPop) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, VPushPop) {
   __ vpushs(S2, 4);
   __ vpushd(D2, 4);
 
@@ -1126,9 +1064,7 @@
   EmitAndCheck(&assembler, "VPushPop");
 }
 
-TEST(Thumb2AssemblerTest, Max16BitBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Max16BitBranch) {
   Label l1;
   __ b(&l1);
   for (int i = 0 ; i < (1 << 11) ; i += 2) {
@@ -1140,9 +1076,7 @@
   EmitAndCheck(&assembler, "Max16BitBranch");
 }
 
-TEST(Thumb2AssemblerTest, Branch32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Branch32) {
   Label l1;
   __ b(&l1);
   for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
@@ -1154,9 +1088,7 @@
   EmitAndCheck(&assembler, "Branch32");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchMax) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchMax) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 7) ; i += 2) {
@@ -1168,9 +1100,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchMax");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchRelocation16) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 7) + 2 ; i += 2) {
@@ -1182,9 +1112,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchRelocation16");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranchRelocation32) {
   Label l1;
   __ cbz(R4, &l1);
   for (int i = 0 ; i < (1 << 11) + 2 ; i += 2) {
@@ -1196,9 +1124,7 @@
   EmitAndCheck(&assembler, "CompareAndBranchRelocation32");
 }
 
-TEST(Thumb2AssemblerTest, MixedBranch32) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, MixedBranch32) {
   Label l1;
   Label l2;
   __ b(&l1);      // Forwards.
@@ -1215,9 +1141,7 @@
   EmitAndCheck(&assembler, "MixedBranch32");
 }
 
-TEST(Thumb2AssemblerTest, Shifts) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, Shifts) {
   // 16 bit selected for CcDontCare.
   __ Lsl(R0, R1, 5);
   __ Lsr(R0, R1, 5);
@@ -1292,9 +1216,7 @@
   EmitAndCheck(&assembler, "Shifts");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreRegOffset) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreRegOffset) {
   // 16 bit.
   __ ldr(R0, Address(R1, R2));
   __ str(R0, Address(R1, R2));
@@ -1319,9 +1241,7 @@
   EmitAndCheck(&assembler, "LoadStoreRegOffset");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreLiteral) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreLiteral) {
   __ ldr(R0, Address(4));
   __ str(R0, Address(4));
 
@@ -1337,9 +1257,7 @@
   EmitAndCheck(&assembler, "LoadStoreLiteral");
 }
 
-TEST(Thumb2AssemblerTest, LoadStoreLimits) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, LoadStoreLimits) {
   __ ldr(R0, Address(R4, 124));     // 16 bit.
   __ ldr(R0, Address(R4, 128));     // 32 bit.
 
@@ -1367,9 +1285,7 @@
   EmitAndCheck(&assembler, "LoadStoreLimits");
 }
 
-TEST(Thumb2AssemblerTest, CompareAndBranch) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CompareAndBranch) {
   Label label;
   __ CompareAndBranchIfZero(arm::R0, &label);
   __ CompareAndBranchIfZero(arm::R11, &label);
@@ -1380,9 +1296,7 @@
   EmitAndCheck(&assembler, "CompareAndBranch");
 }
 
-TEST(Thumb2AssemblerTest, AddConstant) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, AddConstant) {
   // Low registers, Rd != Rn.
   __ AddConstant(R0, R1, 0);                          // MOV.
   __ AddConstant(R0, R1, 1);                          // 16-bit ADDS, encoding T1.
@@ -1626,9 +1540,7 @@
   EmitAndCheck(&assembler, "AddConstant");
 }
 
-TEST(Thumb2AssemblerTest, CmpConstant) {
-  arm::Thumb2Assembler assembler;
-
+TEST_F(Thumb2AssemblerTest, CmpConstant) {
   __ CmpConstant(R0, 0);                              // 16-bit CMP.
   __ CmpConstant(R1, 1);                              // 16-bit CMP.
   __ CmpConstant(R0, 7);                              // 16-bit CMP.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index ffac4c4..ecb67bd 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -102,8 +102,10 @@
 
 class MipsAssembler FINAL : public Assembler {
  public:
-  explicit MipsAssembler(const MipsInstructionSetFeatures* instruction_set_features = nullptr)
-      : overwriting_(false),
+  explicit MipsAssembler(ArenaAllocator* arena,
+                         const MipsInstructionSetFeatures* instruction_set_features = nullptr)
+      : Assembler(arena),
+        overwriting_(false),
         overwrite_location_(0),
         last_position_adjustment_(0),
         last_old_position_(0),
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 71f5e00..8acc38a 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -102,8 +102,9 @@
 
 class Mips64Assembler FINAL : public Assembler {
  public:
-  Mips64Assembler()
-      : overwriting_(false),
+  explicit Mips64Assembler(ArenaAllocator* arena)
+      : Assembler(arena),
+        overwriting_(false),
         overwrite_location_(0),
         last_position_adjustment_(0),
         last_old_position_(0),
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 3efef70..2203646 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -2379,7 +2379,7 @@
 }
 
 void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86ExceptionSlowPath* slow = new X86ExceptionSlowPath(stack_adjust);
+  X86ExceptionSlowPath* slow = new (GetArena()) X86ExceptionSlowPath(stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<4>()), Immediate(0));
   j(kNotEqual, slow->Entry());
@@ -2402,7 +2402,7 @@
 }
 
 void X86Assembler::AddConstantArea() {
-  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   // Generate the data for the literal area.
   for (size_t i = 0, e = area.size(); i < e; i++) {
     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 00ff7bd..8567ad2 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -18,12 +18,15 @@
 #define ART_COMPILER_UTILS_X86_ASSEMBLER_X86_H_
 
 #include <vector>
+
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/macros.h"
 #include "constants_x86.h"
 #include "globals.h"
 #include "managed_register_x86.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -260,7 +263,7 @@
  */
 class ConstantArea {
  public:
-  ConstantArea() {}
+  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
@@ -290,18 +293,18 @@
     return buffer_.size() * elem_size_;
   }
 
-  const std::vector<int32_t>& GetBuffer() const {
-    return buffer_;
+  ArrayRef<const int32_t> GetBuffer() const {
+    return ArrayRef<const int32_t>(buffer_);
   }
 
  private:
   static constexpr size_t elem_size_ = sizeof(int32_t);
-  std::vector<int32_t> buffer_;
+  ArenaVector<int32_t> buffer_;
 };
 
 class X86Assembler FINAL : public Assembler {
  public:
-  X86Assembler() {}
+  explicit X86Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86Assembler() {}
 
   /*
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index d0d5147..1d1df6e 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -16,13 +16,16 @@
 
 #include "assembler_x86.h"
 
+#include "base/arena_allocator.h"
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
 namespace art {
 
 TEST(AssemblerX86, CreateBuffer) {
-  AssemblerBuffer buffer;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  AssemblerBuffer buffer(&arena);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
   buffer.Emit<uint8_t>(0x42);
   ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d86ad1b..32eb4a3 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -3144,7 +3144,7 @@
 };
 
 void X86_64Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
-  X86_64ExceptionSlowPath* slow = new X86_64ExceptionSlowPath(stack_adjust);
+  X86_64ExceptionSlowPath* slow = new (GetArena()) X86_64ExceptionSlowPath(stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<8>(), true), Immediate(0));
   j(kNotEqual, slow->Entry());
@@ -3167,7 +3167,7 @@
 }
 
 void X86_64Assembler::AddConstantArea() {
-  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  ArrayRef<const int32_t> area = constant_area_.GetBuffer();
   for (size_t i = 0, e = area.size(); i < e; i++) {
     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
     EmitInt32(area[i]);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index f00cb12..92c7d0a 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -19,12 +19,14 @@
 
 #include <vector>
 
+#include "base/arena_containers.h"
 #include "base/bit_utils.h"
 #include "base/macros.h"
 #include "constants_x86_64.h"
 #include "globals.h"
 #include "managed_register_x86_64.h"
 #include "offsets.h"
+#include "utils/array_ref.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -270,7 +272,7 @@
  */
 class ConstantArea {
  public:
-  ConstantArea() {}
+  explicit ConstantArea(ArenaAllocator* arena) : buffer_(arena->Adapter(kArenaAllocAssembler)) {}
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
@@ -296,13 +298,13 @@
     return buffer_.size() * elem_size_;
   }
 
-  const std::vector<int32_t>& GetBuffer() const {
-    return buffer_;
+  ArrayRef<const int32_t> GetBuffer() const {
+    return ArrayRef<const int32_t>(buffer_);
   }
 
  private:
   static constexpr size_t elem_size_ = sizeof(int32_t);
-  std::vector<int32_t> buffer_;
+  ArenaVector<int32_t> buffer_;
 };
 
 
@@ -332,7 +334,7 @@
 
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() {}
+  explicit X86_64Assembler(ArenaAllocator* arena) : Assembler(arena), constant_area_(arena) {}
   virtual ~X86_64Assembler() {}
 
   /*
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 4f65709..b19e616 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -27,7 +27,9 @@
 namespace art {
 
 TEST(AssemblerX86_64, CreateBuffer) {
-  AssemblerBuffer buffer;
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  AssemblerBuffer buffer(&arena);
   AssemblerBuffer::EnsureCapacity ensured(&buffer);
   buffer.Emit<uint8_t>(0x42);
   ASSERT_EQ(static_cast<size_t>(1), buffer.Size());
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index cbd0c40..214222d 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -51,11 +51,13 @@
   explicit ImgDiagDumper(std::ostream* os,
                          const ImageHeader& image_header,
                          const std::string& image_location,
-                         pid_t image_diff_pid)
+                         pid_t image_diff_pid,
+                         pid_t zygote_diff_pid)
       : os_(os),
         image_header_(image_header),
         image_location_(image_location),
-        image_diff_pid_(image_diff_pid) {}
+        image_diff_pid_(image_diff_pid),
+        zygote_diff_pid_(zygote_diff_pid) {}
 
   bool Dump() SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
@@ -68,7 +70,7 @@
     bool ret = true;
     if (image_diff_pid_ >= 0) {
       os << "IMAGE DIFF PID (" << image_diff_pid_ << "): ";
-      ret = DumpImageDiff(image_diff_pid_);
+      ret = DumpImageDiff(image_diff_pid_, zygote_diff_pid_);
       os << "\n\n";
     } else {
       os << "IMAGE DIFF PID: disabled\n\n";
@@ -95,7 +97,8 @@
     return str.substr(idx + 1);
   }
 
-  bool DumpImageDiff(pid_t image_diff_pid) SHARED_REQUIRES(Locks::mutator_lock_) {
+  bool DumpImageDiff(pid_t image_diff_pid, pid_t zygote_diff_pid)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
 
     {
@@ -138,7 +141,7 @@
     }
 
     // Future idea: diff against zygote so we can ignore the shared dirty pages.
-    return DumpImageDiffMap(image_diff_pid, boot_map);
+    return DumpImageDiffMap(image_diff_pid, zygote_diff_pid, boot_map);
   }
 
   static std::string PrettyFieldValue(ArtField* field, mirror::Object* obj)
@@ -212,8 +215,74 @@
     std::vector<mirror::Object*> dirty_objects;
   };
 
+  void DiffObjectContents(mirror::Object* obj,
+                          uint8_t* remote_bytes,
+                          std::ostream& os) SHARED_REQUIRES(Locks::mutator_lock_) {
+    const char* tabs = "    ";
+    // Attempt to find fields for all dirty bytes.
+    mirror::Class* klass = obj->GetClass();
+    if (obj->IsClass()) {
+      os << tabs << "Class " << PrettyClass(obj->AsClass()) << " " << obj << "\n";
+    } else {
+      os << tabs << "Instance of " << PrettyClass(klass) << " " << obj << "\n";
+    }
+
+    std::unordered_set<ArtField*> dirty_instance_fields;
+    std::unordered_set<ArtField*> dirty_static_fields;
+    const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+    mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(remote_bytes);
+    for (size_t i = 0, count = obj->SizeOf(); i < count; ++i) {
+      if (obj_bytes[i] != remote_bytes[i]) {
+        ArtField* field = ArtField::FindInstanceFieldWithOffset</*exact*/false>(klass, i);
+        if (field != nullptr) {
+          dirty_instance_fields.insert(field);
+        } else if (obj->IsClass()) {
+          field = ArtField::FindStaticFieldWithOffset</*exact*/false>(obj->AsClass(), i);
+          if (field != nullptr) {
+            dirty_static_fields.insert(field);
+          }
+        }
+        if (field == nullptr) {
+          if (klass->IsArrayClass()) {
+            mirror::Class* component_type = klass->GetComponentType();
+            Primitive::Type primitive_type = component_type->GetPrimitiveType();
+            size_t component_size = Primitive::ComponentSize(primitive_type);
+            size_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
+            if (i >= data_offset) {
+              os << tabs << "Dirty array element " << (i - data_offset) / component_size << "\n";
+              // Skip to next element to prevent spam.
+              i += component_size - 1;
+              continue;
+            }
+          }
+          os << tabs << "No field for byte offset " << i << "\n";
+        }
+      }
+    }
+    // Dump different fields. TODO: Dump field contents.
+    if (!dirty_instance_fields.empty()) {
+      os << tabs << "Dirty instance fields " << dirty_instance_fields.size() << "\n";
+      for (ArtField* field : dirty_instance_fields) {
+        os << tabs << PrettyField(field)
+           << " original=" << PrettyFieldValue(field, obj)
+           << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+      }
+    }
+    if (!dirty_static_fields.empty()) {
+      os << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
+      for (ArtField* field : dirty_static_fields) {
+        os << tabs << PrettyField(field)
+           << " original=" << PrettyFieldValue(field, obj)
+           << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
+      }
+    }
+    os << "\n";
+  }
+
   // Look at /proc/$pid/mem and only diff the things from there
-  bool DumpImageDiffMap(pid_t image_diff_pid, const backtrace_map_t& boot_map)
+  bool DumpImageDiffMap(pid_t image_diff_pid,
+                        pid_t zygote_diff_pid,
+                        const backtrace_map_t& boot_map)
     SHARED_REQUIRES(Locks::mutator_lock_) {
     std::ostream& os = *os_;
     const size_t pointer_size = InstructionSetPointerSize(
@@ -272,6 +341,20 @@
       return false;
     }
 
+    std::vector<uint8_t> zygote_contents;
+    std::unique_ptr<File> zygote_map_file;
+    if (zygote_diff_pid != -1) {
+      std::string zygote_file_name =
+          StringPrintf("/proc/%ld/mem", static_cast<long>(zygote_diff_pid));  // NOLINT [runtime/int]
+      zygote_map_file.reset(OS::OpenFileForReading(zygote_file_name.c_str()));
+      // The boot map should be at the same address.
+      zygote_contents.resize(boot_map_size);
+      if (!zygote_map_file->PreadFully(&zygote_contents[0], boot_map_size, boot_map.start)) {
+        LOG(WARNING) << "Could not fully read zygote file " << zygote_file_name;
+        zygote_contents.clear();
+      }
+    }
+
     std::string page_map_file_name = StringPrintf(
         "/proc/%ld/pagemap", static_cast<long>(image_diff_pid));  // NOLINT [runtime/int]
     auto page_map_file = std::unique_ptr<File>(OS::OpenFileForReading(page_map_file_name.c_str()));
@@ -416,8 +499,11 @@
     // Look up local classes by their descriptor
     std::map<std::string, mirror::Class*> local_class_map;
 
-    // Use set to have sorted output.
-    std::set<mirror::Object*> dirty_objects;
+    // Objects that are dirty against the image (possibly shared or private dirty).
+    std::set<mirror::Object*> image_dirty_objects;
+
+    // Objects that are dirty against the zygote (probably private dirty).
+    std::set<mirror::Object*> zygote_dirty_objects;
 
     size_t dirty_object_bytes = 0;
     const uint8_t* begin_image_ptr = image_begin_unaligned;
@@ -454,17 +540,29 @@
 
       mirror::Class* klass = obj->GetClass();
 
-      bool different_object = false;
-
       // Check against the other object and see if they are different
       ptrdiff_t offset = current - begin_image_ptr;
       const uint8_t* current_remote = &remote_contents[offset];
       mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(
           const_cast<uint8_t*>(current_remote));
-      if (memcmp(current, current_remote, obj->SizeOf()) != 0) {
+
+      bool different_image_object = memcmp(current, current_remote, obj->SizeOf()) != 0;
+      if (different_image_object) {
+        bool different_zygote_object = false;
+        if (!zygote_contents.empty()) {
+          const uint8_t* zygote_ptr = &zygote_contents[offset];
+          different_zygote_object = memcmp(current, zygote_ptr, obj->SizeOf()) != 0;
+        }
+        if (different_zygote_object) {
+          // Different from zygote.
+          zygote_dirty_objects.insert(obj);
+        } else {
+          // Just different from iamge.
+          image_dirty_objects.insert(obj);
+        }
+
         different_objects++;
         dirty_object_bytes += obj->SizeOf();
-        dirty_objects.insert(obj);
 
         ++class_data[klass].dirty_object_count;
 
@@ -477,16 +575,13 @@
         }
         class_data[klass].dirty_object_byte_count += dirty_byte_count_per_object;
         class_data[klass].dirty_object_size_in_bytes += obj->SizeOf();
-
-        different_object = true;
-
         class_data[klass].dirty_objects.push_back(remote_obj);
       } else {
         ++class_data[klass].clean_object_count;
       }
 
       std::string descriptor = GetClassDescriptor(klass);
-      if (different_object) {
+      if (different_image_object) {
         if (klass->IsClassClass()) {
           // this is a "Class"
           mirror::Class* obj_as_class  = reinterpret_cast<mirror::Class*>(remote_obj);
@@ -558,69 +653,23 @@
     auto clean_object_class_values = SortByValueDesc<mirror::Class*, int, ClassData>(
         class_data, [](const ClassData& d) { return d.clean_object_count; });
 
-    os << "\n" << "  Dirty objects: " << dirty_objects.size() << "\n";
-    for (mirror::Object* obj : dirty_objects) {
-      const char* tabs = "    ";
-      // Attempt to find fields for all dirty bytes.
-      mirror::Class* klass = obj->GetClass();
-      if (obj->IsClass()) {
-        os << tabs << "Class " << PrettyClass(obj->AsClass()) << " " << obj << "\n";
-      } else {
-        os << tabs << "Instance of " << PrettyClass(klass) << " " << obj << "\n";
+    if (!zygote_dirty_objects.empty()) {
+      os << "\n" << "  Dirty objects compared to zygote (probably private dirty): "
+         << zygote_dirty_objects.size() << "\n";
+      for (mirror::Object* obj : zygote_dirty_objects) {
+        const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
+        ptrdiff_t offset = obj_bytes - begin_image_ptr;
+        uint8_t* remote_bytes = &zygote_contents[offset];
+        DiffObjectContents(obj, remote_bytes, os);
       }
-
-      std::unordered_set<ArtField*> dirty_instance_fields;
-      std::unordered_set<ArtField*> dirty_static_fields;
+    }
+    os << "\n" << "  Dirty objects compared to image (private or shared dirty): "
+       << image_dirty_objects.size() << "\n";
+    for (mirror::Object* obj : image_dirty_objects) {
       const uint8_t* obj_bytes = reinterpret_cast<const uint8_t*>(obj);
       ptrdiff_t offset = obj_bytes - begin_image_ptr;
       uint8_t* remote_bytes = &remote_contents[offset];
-      mirror::Object* remote_obj = reinterpret_cast<mirror::Object*>(remote_bytes);
-      for (size_t i = 0, count = obj->SizeOf(); i < count; ++i) {
-        if (obj_bytes[i] != remote_bytes[i]) {
-          ArtField* field = ArtField::FindInstanceFieldWithOffset</*exact*/false>(klass, i);
-          if (field != nullptr) {
-            dirty_instance_fields.insert(field);
-          } else if (obj->IsClass()) {
-            field = ArtField::FindStaticFieldWithOffset</*exact*/false>(obj->AsClass(), i);
-            if (field != nullptr) {
-              dirty_static_fields.insert(field);
-            }
-          }
-          if (field == nullptr) {
-            if (klass->IsArrayClass()) {
-              mirror::Class* component_type = klass->GetComponentType();
-              Primitive::Type primitive_type = component_type->GetPrimitiveType();
-              size_t component_size = Primitive::ComponentSize(primitive_type);
-              size_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value();
-              if (i >= data_offset) {
-                os << tabs << "Dirty array element " << (i - data_offset) / component_size << "\n";
-                // Skip to next element to prevent spam.
-                i += component_size - 1;
-                continue;
-              }
-            }
-            os << tabs << "No field for byte offset " << i << "\n";
-          }
-        }
-      }
-      // Dump different fields. TODO: Dump field contents.
-      if (!dirty_instance_fields.empty()) {
-        os << tabs << "Dirty instance fields " << dirty_instance_fields.size() << "\n";
-        for (ArtField* field : dirty_instance_fields) {
-          os << tabs << PrettyField(field)
-             << " original=" << PrettyFieldValue(field, obj)
-             << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
-        }
-      }
-      if (!dirty_static_fields.empty()) {
-        os << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
-        for (ArtField* field : dirty_static_fields) {
-          os << tabs << PrettyField(field)
-             << " original=" << PrettyFieldValue(field, obj)
-             << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
-        }
-      }
-      os << "\n";
+      DiffObjectContents(obj, remote_bytes, os);
     }
 
     os << "\n" << "  Dirty object count by class:\n";
@@ -959,11 +1008,15 @@
   const ImageHeader& image_header_;
   const std::string image_location_;
   pid_t image_diff_pid_;  // Dump image diff against boot.art if pid is non-negative
+  pid_t zygote_diff_pid_;  // Dump image diff against zygote boot.art if pid is non-negative
 
   DISALLOW_COPY_AND_ASSIGN(ImgDiagDumper);
 };
 
-static int DumpImage(Runtime* runtime, std::ostream* os, pid_t image_diff_pid) {
+static int DumpImage(Runtime* runtime,
+                     std::ostream* os,
+                     pid_t image_diff_pid,
+                     pid_t zygote_diff_pid) {
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
   std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
@@ -975,8 +1028,11 @@
       return EXIT_FAILURE;
     }
 
-    ImgDiagDumper img_diag_dumper(
-        os, image_header, image_space->GetImageLocation(), image_diff_pid);
+    ImgDiagDumper img_diag_dumper(os,
+                                  image_header,
+                                  image_space->GetImageLocation(),
+                                  image_diff_pid,
+                                  zygote_diff_pid);
     if (!img_diag_dumper.Dump()) {
       return EXIT_FAILURE;
     }
@@ -1004,6 +1060,13 @@
         *error_msg = "Image diff pid out of range";
         return kParseError;
       }
+    } else if (option.starts_with("--zygote-diff-pid=")) {
+      const char* zygote_diff_pid = option.substr(strlen("--zygote-diff-pid=")).data();
+
+      if (!ParseInt(zygote_diff_pid, &zygote_diff_pid_)) {
+        *error_msg = "Zygote diff pid out of range";
+        return kParseError;
+      }
     } else {
       return kParseUnknownArgument;
     }
@@ -1053,6 +1116,9 @@
     usage +=  // Optional.
         "  --image-diff-pid=<pid>: provide the PID of a process whose boot.art you want to diff.\n"
         "      Example: --image-diff-pid=$(pid zygote)\n"
+        "  --zygote-diff-pid=<pid>: provide the PID of the zygote whose boot.art you want to diff "
+        "against.\n"
+        "      Example: --zygote-diff-pid=$(pid zygote)\n"
         "\n";
 
     return usage;
@@ -1060,6 +1126,7 @@
 
  public:
   pid_t image_diff_pid_ = -1;
+  pid_t zygote_diff_pid_ = -1;
 };
 
 struct ImgDiagMain : public CmdlineMain<ImgDiagArgs> {
@@ -1068,7 +1135,8 @@
 
     return DumpImage(runtime,
                      args_->os_,
-                     args_->image_diff_pid_) == EXIT_SUCCESS;
+                     args_->image_diff_pid_,
+                     args_->zygote_diff_pid_) == EXIT_SUCCESS;
   }
 };
 
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index dc101e5..9f771ba 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,8 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+static const char* kImgDiagZygoteDiffPid = "--zygote-diff-pid";
+
 // from kernel <include/linux/threads.h>
 #define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
 
@@ -90,17 +92,25 @@
 
     // Run imgdiag --image-diff-pid=$image_diff_pid and wait until it's done with a 0 exit code.
     std::string diff_pid_args;
+    std::string zygote_diff_pid_args;
     {
       std::stringstream diff_pid_args_ss;
       diff_pid_args_ss << kImgDiagDiffPid << "=" << image_diff_pid;
       diff_pid_args = diff_pid_args_ss.str();
     }
-    std::string boot_image_args;
     {
-      boot_image_args = boot_image_args + kImgDiagBootImage + "=" + boot_image;
+      std::stringstream zygote_pid_args_ss;
+      zygote_pid_args_ss << kImgDiagZygoteDiffPid << "=" << image_diff_pid;
+      zygote_diff_pid_args = zygote_pid_args_ss.str();
     }
+    std::string boot_image_args = std::string(kImgDiagBootImage) + "=" + boot_image;
 
-    std::vector<std::string> exec_argv = { file_path, diff_pid_args, boot_image_args };
+    std::vector<std::string> exec_argv = {
+        file_path,
+        diff_pid_args,
+        zygote_diff_pid_args,
+        boot_image_args
+    };
 
     return ::art::Exec(exec_argv, error_msg);
   }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index c859079..aa12c83 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -106,7 +106,6 @@
   jit/debugger_interface.cc \
   jit/jit.cc \
   jit/jit_code_cache.cc \
-  jit/jit_instrumentation.cc \
   jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   jit/profile_saver.cc  \
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index da7db1d..e6ff0aa 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1832,7 +1832,7 @@
     add   sp, #4
     .cfi_adjust_cfa_offset -4
     pop   {pc}
-END art_quick_fmod
+END art_quick_fmodf
 
     /* int64_t art_d2l(double d) */
     .extern art_d2l
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index d27d2f6..21725d3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -20,7 +20,7 @@
 #if defined(__cplusplus)
 #include "art_method.h"
 #include "gc/allocator/rosalloc.h"
-#include "jit/jit_instrumentation.h"
+#include "jit/jit.h"
 #include "lock_word.h"
 #include "mirror/class.h"
 #include "mirror/string.h"
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 70ff60f..d951089 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -77,9 +77,11 @@
   "RegAllocVldt ",
   "StackMapStm  ",
   "CodeGen      ",
+  "Assembler    ",
   "ParallelMove ",
   "GraphChecker ",
   "Verifier     ",
+  "CallingConv  ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 697f7e0..52a1002 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -88,9 +88,11 @@
   kArenaAllocRegisterAllocatorValidate,
   kArenaAllocStackMapStream,
   kArenaAllocCodeGenerator,
+  kArenaAllocAssembler,
   kArenaAllocParallelMoveResolver,
   kArenaAllocGraphChecker,
   kArenaAllocVerifier,
+  kArenaAllocCallingConvention,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 3a28422..ce7f62a 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -57,6 +57,7 @@
 // TODO: move all of the macro functionality into the DexCache class.
 class DexFile {
  public:
+  static const uint32_t kDefaultMethodsVersion = 37;
   static const uint8_t kDexMagic[];
   static constexpr size_t kNumDexVersions = 2;
   static constexpr size_t kDexVersionLen = 4;
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 681c5f9..3df4e98 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -2465,7 +2465,7 @@
                                 GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
                                 field_access_flags,
                                 PrettyJavaAccessFlags(field_access_flags).c_str());
-      if (header_->GetVersion() >= 37) {
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
         return false;
       } else {
         // Allow in older versions, but warn.
@@ -2480,7 +2480,7 @@
                                 GetFieldDescriptionOrError(begin_, header_, idx).c_str(),
                                 field_access_flags,
                                 PrettyJavaAccessFlags(field_access_flags).c_str());
-      if (header_->GetVersion() >= 37) {
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
         return false;
       } else {
         // Allow in older versions, but warn.
@@ -2628,12 +2628,16 @@
 
   // Interfaces are special.
   if ((class_access_flags & kAccInterface) != 0) {
-    // Non-static interface methods must be public.
-    if ((method_access_flags & (kAccPublic | kAccStatic)) == 0) {
+    // Non-static interface methods must be public or private.
+    uint32_t desired_flags = (kAccPublic | kAccStatic);
+    if (dex_file_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+      desired_flags |= kAccPrivate;
+    }
+    if ((method_access_flags & desired_flags) == 0) {
       *error_msg = StringPrintf("Interface virtual method %" PRIu32 "(%s) is not public",
           method_index,
           GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
-      if (header_->GetVersion() >= 37) {
+      if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
         return false;
       } else {
         // Allow in older versions, but warn.
@@ -2686,7 +2690,7 @@
         *error_msg = StringPrintf("Interface method %" PRIu32 "(%s) is not public and abstract",
             method_index,
             GetMethodDescriptionOrError(begin_, header_, method_index).c_str());
-        if (header_->GetVersion() >= 37) {
+        if (header_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
           return false;
         } else {
           // Allow in older versions, but warn.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index baf4afe..a432782 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -285,16 +285,19 @@
     }
 
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr && jit->CanInvokeCompiledCode(method)) {
-      JValue result;
+    if (jit != nullptr) {
+      jit->MethodEntered(self, shadow_frame.GetMethod());
+      if (jit->CanInvokeCompiledCode(method)) {
+        JValue result;
 
-      // Pop the shadow frame before calling into compiled code.
-      self->PopShadowFrame();
-      ArtInterpreterToCompiledCodeBridge(self, code_item, &shadow_frame, &result);
-      // Push the shadow frame back as the caller will expect it.
-      self->PushShadowFrame(&shadow_frame);
+        // Pop the shadow frame before calling into compiled code.
+        self->PopShadowFrame();
+        ArtInterpreterToCompiledCodeBridge(self, code_item, &shadow_frame, &result);
+        // Push the shadow frame back as the caller will expect it.
+        self->PushShadowFrame(&shadow_frame);
 
-      return result;
+        return result;
+      }
     }
   }
 
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 19d971e..fb98175 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -34,6 +34,7 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
+#include "jit/jit.h"
 #include "lambda/art_lambda_method.h"
 #include "lambda/box_table.h"
 #include "lambda/closure.h"
@@ -628,6 +629,15 @@
     result->SetJ(0);
     return false;
   } else {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      if (type == kVirtual || type == kInterface) {
+        jit->InvokeVirtualOrInterface(
+            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+      jit->AddSamples(self, sf_method, 1);
+    }
+    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (type == kVirtual || type == kInterface) {
       instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
       if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
@@ -667,7 +677,14 @@
     result->SetJ(0);
     return false;
   } else {
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      jit->InvokeVirtualOrInterface(
+          self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
+      jit->AddSamples(self, shadow_frame.GetMethod(), 1);
+    }
     instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+    // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
       instrumentation->InvokeVirtualOrInterface(
           self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index ce698fb..c95af6f 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -22,7 +22,6 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -67,7 +66,9 @@
 
 #define BRANCH_INSTRUMENTATION(offset)                                                          \
   do {                                                                                          \
-    instrumentation->Branch(self, method, dex_pc, offset);                                      \
+    if (UNLIKELY(instrumentation->HasBranchListeners())) {                                      \
+      instrumentation->Branch(self, method, dex_pc, offset);                                    \
+    }                                                                                           \
     JValue result;                                                                              \
     if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {           \
       return result;                                                                            \
@@ -76,8 +77,8 @@
 
 #define HOTNESS_UPDATE()                                                                       \
   do {                                                                                         \
-    if (jit_instrumentation_cache != nullptr) {                                                \
-      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
+    if (jit != nullptr) {                                                                      \
+      jit->AddSamples(self, method, 1);                                                        \
     }                                                                                          \
   } while (false)
 
@@ -195,10 +196,6 @@
   const auto* const instrumentation = Runtime::Current()->GetInstrumentation();
   ArtMethod* method = shadow_frame.GetMethod();
   jit::Jit* jit = Runtime::Current()->GetJit();
-  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
-  if (jit != nullptr) {
-    jit_instrumentation_cache = jit->GetInstrumentationCache();
-  }
 
   // Jump to first instruction.
   ADVANCE(0);
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 442e191..ca1d635 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -18,7 +18,6 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "safe_math.h"
 
 #include <memory>  // std::unique_ptr
@@ -74,7 +73,9 @@
 
 #define BRANCH_INSTRUMENTATION(offset)                                                         \
   do {                                                                                         \
-    instrumentation->Branch(self, method, dex_pc, offset);                                     \
+    if (UNLIKELY(instrumentation->HasBranchListeners())) {                                     \
+      instrumentation->Branch(self, method, dex_pc, offset);                                   \
+    }                                                                                          \
     JValue result;                                                                             \
     if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) {          \
       if (interpret_one_instruction) {                                                         \
@@ -87,8 +88,8 @@
 
 #define HOTNESS_UPDATE()                                                                       \
   do {                                                                                         \
-    if (jit_instrumentation_cache != nullptr) {                                                \
-      jit_instrumentation_cache->AddSamples(self, method, 1);                                  \
+    if (jit != nullptr) {                                                                      \
+      jit->AddSamples(self, method, 1);                                                        \
     }                                                                                          \
   } while (false)
 
@@ -115,10 +116,6 @@
   uint16_t inst_data;
   ArtMethod* method = shadow_frame.GetMethod();
   jit::Jit* jit = Runtime::Current()->GetJit();
-  jit::JitInstrumentationCache* jit_instrumentation_cache = nullptr;
-  if (jit != nullptr) {
-    jit_instrumentation_cache = jit->GetInstrumentationCache();
-  }
 
   // TODO: collapse capture-variable+create-lambda into one opcode, then we won't need
   // to keep this live for the scope of the entire function call.
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 32c45fc..f800683 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -20,8 +20,6 @@
 #include "interpreter/interpreter_common.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
-#include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "debugger.h"
 
 namespace art {
@@ -652,10 +650,9 @@
   int32_t countdown_value = jit::kJitHotnessDisabled;
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr) {
-    jit::JitInstrumentationCache* cache = jit->GetInstrumentationCache();
-    int32_t warm_threshold = cache->WarmMethodThreshold();
-    int32_t hot_threshold = cache->HotMethodThreshold();
-    int32_t osr_threshold = cache->OSRMethodThreshold();
+    int32_t warm_threshold = jit->WarmMethodThreshold();
+    int32_t hot_threshold = jit->HotMethodThreshold();
+    int32_t osr_threshold = jit->OSRMethodThreshold();
     if (hotness_count < warm_threshold) {
       countdown_value = warm_threshold - hotness_count;
     } else if (hotness_count < hot_threshold) {
@@ -666,7 +663,7 @@
       countdown_value = jit::kJitCheckForOSR;
     }
     if (jit::Jit::ShouldUsePriorityThreadWeight()) {
-      int32_t priority_thread_weight = cache->PriorityThreadWeight();
+      int32_t priority_thread_weight = jit->PriorityThreadWeight();
       countdown_value = std::min(countdown_value, countdown_value / priority_thread_weight);
     }
   }
@@ -692,7 +689,7 @@
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr) {
     int16_t count = shadow_frame->GetCachedHotnessCountdown() - shadow_frame->GetHotnessCountdown();
-    jit->GetInstrumentationCache()->AddSamples(self, method, count);
+    jit->AddSamples(self, method, count);
   }
   return MterpSetUpHotnessCountdown(method, shadow_frame);
 }
@@ -705,7 +702,7 @@
   uint32_t dex_pc = shadow_frame->GetDexPC();
   jit::Jit* jit = Runtime::Current()->GetJit();
   if ((jit != nullptr) && (offset <= 0)) {
-    jit->GetInstrumentationCache()->AddSamples(self, method, 1);
+    jit->AddSamples(self, method, 1);
   }
   int16_t countdown_value = MterpSetUpHotnessCountdown(method, shadow_frame);
   if (countdown_value == jit::kJitCheckForOSR) {
@@ -725,7 +722,7 @@
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (offset <= 0) {
     // Keep updating hotness in case a compilation request was dropped.  Eventually it will retry.
-    jit->GetInstrumentationCache()->AddSamples(self, method, 1);
+    jit->AddSamples(self, method, 1);
   }
   // Assumes caller has already determined that an OSR check is appropriate.
   return jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, result);
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index d9d7a19..7cdd7c5 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -70,15 +70,19 @@
 
 static Mutex g_jit_debug_mutex("JIT debug interface lock", kJitDebugInterfaceLock);
 
-static JITCodeEntry* CreateJITCodeEntryInternal(
-    std::unique_ptr<const uint8_t[]> symfile_addr,
-    uintptr_t symfile_size)
+static JITCodeEntry* CreateJITCodeEntryInternal(std::vector<uint8_t> symfile)
     REQUIRES(g_jit_debug_mutex) {
-  DCHECK(symfile_addr.get() != nullptr);
+  DCHECK_NE(symfile.size(), 0u);
+
+  // Make a copy of the buffer. We want to shrink it anyway.
+  uint8_t* symfile_copy = new uint8_t[symfile.size()];
+  CHECK(symfile_copy != nullptr);
+  memcpy(symfile_copy, symfile.data(), symfile.size());
 
   JITCodeEntry* entry = new JITCodeEntry;
-  entry->symfile_addr_ = symfile_addr.release();
-  entry->symfile_size_ = symfile_size;
+  CHECK(entry != nullptr);
+  entry->symfile_addr_ = symfile_copy;
+  entry->symfile_size_ = symfile.size();
   entry->prev_ = nullptr;
 
   entry->next_ = __jit_debug_descriptor.first_entry_;
@@ -111,11 +115,10 @@
   delete entry;
 }
 
-JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
-                                 uintptr_t symfile_size) {
+JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile) {
   Thread* self = Thread::Current();
   MutexLock mu(self, g_jit_debug_mutex);
-  return CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+  return CreateJITCodeEntryInternal(std::move(symfile));
 }
 
 void DeleteJITCodeEntry(JITCodeEntry* entry) {
@@ -128,14 +131,12 @@
 // so that the user of the JIT interface does not have to store them.
 static std::unordered_map<uintptr_t, JITCodeEntry*> g_jit_code_entries;
 
-void CreateJITCodeEntryForAddress(uintptr_t address,
-                                  std::unique_ptr<const uint8_t[]> symfile_addr,
-                                  uintptr_t symfile_size) {
+void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile) {
   Thread* self = Thread::Current();
   MutexLock mu(self, g_jit_debug_mutex);
   DCHECK_NE(address, 0u);
   DCHECK(g_jit_code_entries.find(address) == g_jit_code_entries.end());
-  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile_addr), symfile_size);
+  JITCodeEntry* entry = CreateJITCodeEntryInternal(std::move(symfile));
   g_jit_code_entries.emplace(address, entry);
 }
 
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index 74469a9..d9bf331 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -19,6 +19,7 @@
 
 #include <inttypes.h>
 #include <memory>
+#include <vector>
 
 namespace art {
 
@@ -28,8 +29,7 @@
 
 // Notify native debugger about new JITed code by passing in-memory ELF.
 // It takes ownership of the in-memory ELF file.
-JITCodeEntry* CreateJITCodeEntry(std::unique_ptr<const uint8_t[]> symfile_addr,
-                                 uintptr_t symfile_size);
+JITCodeEntry* CreateJITCodeEntry(std::vector<uint8_t> symfile);
 
 // Notify native debugger that JITed code has been removed.
 // It also releases the associated in-memory ELF file.
@@ -38,9 +38,7 @@
 // Notify native debugger about new JITed code by passing in-memory ELF.
 // The address is used only to uniquely identify the entry.
 // It takes ownership of the in-memory ELF file.
-void CreateJITCodeEntryForAddress(uintptr_t address,
-                                  std::unique_ptr<const uint8_t[]> symfile_addr,
-                                  uintptr_t symfile_size);
+void CreateJITCodeEntryForAddress(uintptr_t address, std::vector<uint8_t> symfile);
 
 // Notify native debugger that JITed code has been removed.
 // Returns false if entry for the given address was not found.
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 3344346..558e443 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -23,7 +23,6 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
-#include "jit_instrumentation.h"
 #include "oat_file_manager.h"
 #include "oat_quick_method_header.h"
 #include "offline_profiling_info.h"
@@ -31,12 +30,15 @@
 #include "runtime.h"
 #include "runtime_options.h"
 #include "stack_map.h"
+#include "thread_list.h"
 #include "utils.h"
 
 namespace art {
 namespace jit {
 
 static constexpr bool kEnableOnStackReplacement = true;
+// At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
+static constexpr int kJitPoolThreadPthreadPriority = 9;
 
 // JIT compiler
 void* Jit::jit_library_handle_= nullptr;
@@ -146,6 +148,17 @@
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
       << ", compile_threshold=" << options->GetCompileThreshold()
       << ", save_profiling_info=" << options->GetSaveProfilingInfo();
+
+
+  jit->hot_method_threshold_ = options->GetCompileThreshold();
+  jit->warm_method_threshold_ = options->GetWarmupThreshold();
+  jit->osr_method_threshold_ = options->GetOsrThreshold();
+  jit->priority_thread_weight_ = options->GetPriorityThreadWeight();
+
+  jit->CreateThreadPool();
+
+  // Notify native debugger about the classes already loaded before the creation of the jit.
+  jit->DumpTypeInfoForLoadedTypes(Runtime::Current()->GetClassLinker());
   return jit.release();
 }
 
@@ -233,13 +246,31 @@
 }
 
 void Jit::CreateThreadPool() {
-  CHECK(instrumentation_cache_.get() != nullptr);
-  instrumentation_cache_->CreateThreadPool();
+  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
+  // is not null when we instrument.
+  thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
+  thread_pool_->SetPthreadPriority(kJitPoolThreadPthreadPriority);
+  thread_pool_->StartWorkers(Thread::Current());
 }
 
 void Jit::DeleteThreadPool() {
-  if (instrumentation_cache_.get() != nullptr) {
-    instrumentation_cache_->DeleteThreadPool(Thread::Current());
+  Thread* self = Thread::Current();
+  DCHECK(Runtime::Current()->IsShuttingDown(self));
+  if (thread_pool_ != nullptr) {
+    ThreadPool* cache = nullptr;
+    {
+      ScopedSuspendAll ssa(__FUNCTION__);
+      // Clear thread_pool_ field while the threads are suspended.
+      // A mutator in the 'AddSamples' method will check against it.
+      cache = thread_pool_.release();
+    }
+    cache->StopWorkers(self);
+    cache->RemoveAllTasks(self);
+    // We could just suspend all threads, but we know those threads
+    // will finish in a short period, so it's not worth adding a suspend logic
+    // here. Besides, this is only done for shutdown.
+    cache->Wait(self, false, false);
+    delete cache;
   }
 }
 
@@ -259,10 +290,7 @@
 }
 
 bool Jit::JitAtFirstUse() {
-  if (instrumentation_cache_ != nullptr) {
-    return instrumentation_cache_->HotMethodThreshold() == 0;
-  }
-  return false;
+  return HotMethodThreshold() == 0;
 }
 
 bool Jit::CanInvokeCompiledCode(ArtMethod* method) {
@@ -285,17 +313,6 @@
   }
 }
 
-void Jit::CreateInstrumentationCache(size_t compile_threshold,
-                                     size_t warmup_threshold,
-                                     size_t osr_threshold,
-                                     uint16_t priority_thread_weight) {
-  instrumentation_cache_.reset(
-      new jit::JitInstrumentationCache(compile_threshold,
-                                       warmup_threshold,
-                                       osr_threshold,
-                                       priority_thread_weight));
-}
-
 void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr && jit->generate_debug_info_) {
@@ -480,5 +497,164 @@
   memory_use_.AddValue(bytes);
 }
 
+class JitCompileTask FINAL : public Task {
+ public:
+  enum TaskKind {
+    kAllocateProfile,
+    kCompile,
+    kCompileOsr
+  };
+
+  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
+    ScopedObjectAccess soa(Thread::Current());
+    // Add a global ref to the class to prevent class unloading until compilation is done.
+    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
+    CHECK(klass_ != nullptr);
+  }
+
+  ~JitCompileTask() {
+    ScopedObjectAccess soa(Thread::Current());
+    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
+  }
+
+  void Run(Thread* self) OVERRIDE {
+    ScopedObjectAccess soa(self);
+    if (kind_ == kCompile) {
+      VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) {
+        VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
+      }
+    } else if (kind_ == kCompileOsr) {
+      VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_);
+      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) {
+        VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_);
+      }
+    } else {
+      DCHECK(kind_ == kAllocateProfile);
+      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
+      }
+    }
+  }
+
+  void Finalize() OVERRIDE {
+    delete this;
+  }
+
+ private:
+  ArtMethod* const method_;
+  const TaskKind kind_;
+  jobject klass_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
+};
+
+void Jit::AddSamples(Thread* self, ArtMethod* method, uint16_t count) {
+  if (thread_pool_ == nullptr) {
+    // Should only see this when shutting down.
+    DCHECK(Runtime::Current()->IsShuttingDown(self));
+    return;
+  }
+
+  if (method->IsClassInitializer() || method->IsNative()) {
+    // We do not want to compile such methods.
+    return;
+  }
+  DCHECK(thread_pool_ != nullptr);
+  DCHECK_GT(warm_method_threshold_, 0);
+  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
+  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
+  DCHECK_GE(priority_thread_weight_, 1);
+  DCHECK_LE(priority_thread_weight_, hot_method_threshold_);
+
+  int32_t starting_count = method->GetCounter();
+  if (Jit::ShouldUsePriorityThreadWeight()) {
+    count *= priority_thread_weight_;
+  }
+  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
+  if (starting_count < warm_method_threshold_) {
+    if (new_count >= warm_method_threshold_) {
+      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+      if (success) {
+        VLOG(jit) << "Start profiling " << PrettyMethod(method);
+      }
+
+      if (thread_pool_ == nullptr) {
+        // Calling ProfilingInfo::Create might put us in a suspended state, which could
+        // lead to the thread pool being deleted when we are shutting down.
+        DCHECK(Runtime::Current()->IsShuttingDown(self));
+        return;
+      }
+
+      if (!success) {
+        // We failed allocating. Instead of doing the collection on the Java thread, we push
+        // an allocation to a compiler thread, that will do the collection.
+        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
+      }
+    }
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, hot_method_threshold_ - 1);
+  } else if (starting_count < hot_method_threshold_) {
+    if (new_count >= hot_method_threshold_) {
+      DCHECK(thread_pool_ != nullptr);
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
+    }
+    // Avoid jumping more than one state at a time.
+    new_count = std::min(new_count, osr_method_threshold_ - 1);
+  } else if (starting_count < osr_method_threshold_) {
+    if (new_count >= osr_method_threshold_) {
+      DCHECK(thread_pool_ != nullptr);
+      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+    }
+  }
+  // Update hotness counter
+  method->SetCounter(new_count);
+}
+
+void Jit::MethodEntered(Thread* thread, ArtMethod* method) {
+  if (UNLIKELY(Runtime::Current()->GetJit()->JitAtFirstUse())) {
+    // The compiler requires a ProfilingInfo object.
+    ProfilingInfo::Create(thread, method, /* retry_allocation */ true);
+    JitCompileTask compile_task(method, JitCompileTask::kCompile);
+    compile_task.Run(thread);
+    return;
+  }
+
+  ProfilingInfo* profiling_info = method->GetProfilingInfo(sizeof(void*));
+  // Update the entrypoint if the ProfilingInfo has one. The interpreter will call it
+  // instead of interpreting the method.
+  // We avoid doing this if exit stubs are installed to not mess with the instrumentation.
+  // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
+  if ((profiling_info != nullptr) &&
+      (profiling_info->GetSavedEntryPoint() != nullptr) &&
+      !Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()) {
+    method->SetEntryPointFromQuickCompiledCode(profiling_info->GetSavedEntryPoint());
+  } else {
+    AddSamples(thread, method, 1);
+  }
+}
+
+void Jit::InvokeVirtualOrInterface(Thread* thread,
+                                   mirror::Object* this_object,
+                                   ArtMethod* caller,
+                                   uint32_t dex_pc,
+                                   ArtMethod* callee ATTRIBUTE_UNUSED) {
+  ScopedAssertNoThreadSuspension ants(thread, __FUNCTION__);
+  DCHECK(this_object != nullptr);
+  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
+  if (info != nullptr) {
+    // Since the instrumentation is marked from the declaring class we need to mark the card so
+    // that mod-union tables and card rescanning know about the update.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
+    info->AddInvokeInfo(dex_pc, this_object->GetClass());
+  }
+}
+
+void Jit::WaitForCompilationToFinish(Thread* self) {
+  if (thread_pool_ != nullptr) {
+    thread_pool_->Wait(self, false, false);
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e212366..96f9608 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -34,9 +34,11 @@
 namespace jit {
 
 class JitCodeCache;
-class JitInstrumentationCache;
 class JitOptions;
 
+static constexpr int16_t kJitCheckForOSR = -1;
+static constexpr int16_t kJitHotnessDisabled = -2;
+
 class Jit {
  public:
   static constexpr bool kStressMode = kIsDebugBuild;
@@ -46,17 +48,16 @@
   static Jit* Create(JitOptions* options, std::string* error_msg);
   bool CompileMethod(ArtMethod* method, Thread* self, bool osr)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateInstrumentationCache(size_t compile_threshold,
-                                  size_t warmup_threshold,
-                                  size_t osr_threshold,
-                                  uint16_t priority_thread_weight);
   void CreateThreadPool();
+
   const JitCodeCache* GetCodeCache() const {
     return code_cache_.get();
   }
+
   JitCodeCache* GetCodeCache() {
     return code_cache_.get();
   }
+
   void DeleteThreadPool();
   // Dump interesting info: #methods compiled, code vs data size, compile / verify cumulative
   // loggers.
@@ -68,10 +69,39 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  JitInstrumentationCache* GetInstrumentationCache() const {
-    return instrumentation_cache_.get();
+  size_t OSRMethodThreshold() const {
+    return osr_method_threshold_;
   }
 
+  size_t HotMethodThreshold() const {
+    return hot_method_threshold_;
+  }
+
+  size_t WarmMethodThreshold() const {
+    return warm_method_threshold_;
+  }
+
+  uint16_t PriorityThreadWeight() const {
+    return priority_thread_weight_;
+  }
+
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
+
+  // Profiling methods.
+  void MethodEntered(Thread* thread, ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Starts the profile saver if the config options allow profile recording.
   // The profile will be stored in the specified `filename` and will contain
   // information collected from the given `code_paths` (a set of dex locations).
@@ -137,11 +167,15 @@
   Histogram<uint64_t> memory_use_ GUARDED_BY(lock_);
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
-  std::unique_ptr<jit::JitInstrumentationCache> instrumentation_cache_;
   std::unique_ptr<jit::JitCodeCache> code_cache_;
 
   bool save_profiling_info_;
   static bool generate_debug_info_;
+  uint16_t hot_method_threshold_;
+  uint16_t warm_method_threshold_;
+  uint16_t osr_method_threshold_;
+  uint16_t priority_thread_weight_;
+  std::unique_ptr<ThreadPool> thread_pool_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
deleted file mode 100644
index b2c0c20..0000000
--- a/runtime/jit/jit_instrumentation.cc
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "jit_instrumentation.h"
-
-#include "art_method-inl.h"
-#include "jit.h"
-#include "jit_code_cache.h"
-#include "scoped_thread_state_change.h"
-#include "thread_list.h"
-
-namespace art {
-namespace jit {
-
-// At what priority to schedule jit threads. 9 is the lowest foreground priority on device.
-static constexpr int kJitPoolThreadPthreadPriority = 9;
-
-class JitCompileTask FINAL : public Task {
- public:
-  enum TaskKind {
-    kAllocateProfile,
-    kCompile,
-    kCompileOsr
-  };
-
-  JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
-    ScopedObjectAccess soa(Thread::Current());
-    // Add a global ref to the class to prevent class unloading until compilation is done.
-    klass_ = soa.Vm()->AddGlobalRef(soa.Self(), method_->GetDeclaringClass());
-    CHECK(klass_ != nullptr);
-  }
-
-  ~JitCompileTask() {
-    ScopedObjectAccess soa(Thread::Current());
-    soa.Vm()->DeleteGlobalRef(soa.Self(), klass_);
-  }
-
-  void Run(Thread* self) OVERRIDE {
-    ScopedObjectAccess soa(self);
-    if (kind_ == kCompile) {
-      VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) {
-        VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
-      }
-    } else if (kind_ == kCompileOsr) {
-      VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_);
-      if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) {
-        VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_);
-      }
-    } else {
-      DCHECK(kind_ == kAllocateProfile);
-      if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
-        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
-      }
-    }
-  }
-
-  void Finalize() OVERRIDE {
-    delete this;
-  }
-
- private:
-  ArtMethod* const method_;
-  const TaskKind kind_;
-  jobject klass_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
-};
-
-JitInstrumentationCache::JitInstrumentationCache(uint16_t hot_method_threshold,
-                                                 uint16_t warm_method_threshold,
-                                                 uint16_t osr_method_threshold,
-                                                 uint16_t priority_thread_weight)
-    : hot_method_threshold_(hot_method_threshold),
-      warm_method_threshold_(warm_method_threshold),
-      osr_method_threshold_(osr_method_threshold),
-      priority_thread_weight_(priority_thread_weight),
-      listener_(this) {
-}
-
-void JitInstrumentationCache::CreateThreadPool() {
-  // Create the thread pool before setting the instrumentation, so that
-  // when the threads stopped being suspended, they can use it directly.
-  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
-  // is not null when we instrument.
-  thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
-  thread_pool_->SetPthreadPriority(kJitPoolThreadPthreadPriority);
-  thread_pool_->StartWorkers(Thread::Current());
-  {
-    // Add Jit interpreter instrumentation, tells the interpreter when
-    // to notify the jit to compile something.
-    ScopedSuspendAll ssa(__FUNCTION__);
-    Runtime::Current()->GetInstrumentation()->AddListener(
-        &listener_, JitInstrumentationListener::kJitEvents);
-  }
-}
-
-void JitInstrumentationCache::DeleteThreadPool(Thread* self) {
-  DCHECK(Runtime::Current()->IsShuttingDown(self));
-  if (thread_pool_ != nullptr) {
-    // First remove the listener, to avoid having mutators enter
-    // 'AddSamples'.
-    ThreadPool* cache = nullptr;
-    {
-      ScopedSuspendAll ssa(__FUNCTION__);
-      Runtime::Current()->GetInstrumentation()->RemoveListener(
-          &listener_, JitInstrumentationListener::kJitEvents);
-      // Clear thread_pool_ field while the threads are suspended.
-      // A mutator in the 'AddSamples' method will check against it.
-      cache = thread_pool_.release();
-    }
-    cache->StopWorkers(self);
-    cache->RemoveAllTasks(self);
-    // We could just suspend all threads, but we know those threads
-    // will finish in a short period, so it's not worth adding a suspend logic
-    // here. Besides, this is only done for shutdown.
-    cache->Wait(self, false, false);
-    delete cache;
-  }
-}
-
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, uint16_t count) {
-  // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
-  // than we want resulting in samples even after the method is compiled.  Also, if the
-  // jit is no longer interested in hotness samples because we're shutting down, just return.
-  if (method->IsClassInitializer() || method->IsNative() || (thread_pool_ == nullptr)) {
-    if (thread_pool_ == nullptr) {
-      // Should only see this when shutting down.
-      DCHECK(Runtime::Current()->IsShuttingDown(self));
-    }
-    return;
-  }
-  DCHECK(thread_pool_ != nullptr);
-  DCHECK_GT(warm_method_threshold_, 0);
-  DCHECK_GT(hot_method_threshold_, warm_method_threshold_);
-  DCHECK_GT(osr_method_threshold_, hot_method_threshold_);
-  DCHECK_GE(priority_thread_weight_, 1);
-  DCHECK_LE(priority_thread_weight_, hot_method_threshold_);
-
-  int32_t starting_count = method->GetCounter();
-  if (Jit::ShouldUsePriorityThreadWeight()) {
-    count *= priority_thread_weight_;
-  }
-  int32_t new_count = starting_count + count;   // int32 here to avoid wrap-around;
-  if (starting_count < warm_method_threshold_) {
-    if (new_count >= warm_method_threshold_) {
-      bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
-      if (success) {
-        VLOG(jit) << "Start profiling " << PrettyMethod(method);
-      }
-
-      if (thread_pool_ == nullptr) {
-        // Calling ProfilingInfo::Create might put us in a suspended state, which could
-        // lead to the thread pool being deleted when we are shutting down.
-        DCHECK(Runtime::Current()->IsShuttingDown(self));
-        return;
-      }
-
-      if (!success) {
-        // We failed allocating. Instead of doing the collection on the Java thread, we push
-        // an allocation to a compiler thread, that will do the collection.
-        thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
-      }
-    }
-    // Avoid jumping more than one state at a time.
-    new_count = std::min(new_count, hot_method_threshold_ - 1);
-  } else if (starting_count < hot_method_threshold_) {
-    if (new_count >= hot_method_threshold_) {
-      DCHECK(thread_pool_ != nullptr);
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
-    }
-    // Avoid jumping more than one state at a time.
-    new_count = std::min(new_count, osr_method_threshold_ - 1);
-  } else if (starting_count < osr_method_threshold_) {
-    if (new_count >= osr_method_threshold_) {
-      DCHECK(thread_pool_ != nullptr);
-      thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
-    }
-  }
-  // Update hotness counter
-  method->SetCounter(new_count);
-}
-
-JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
-    : instrumentation_cache_(cache) {
-  CHECK(instrumentation_cache_ != nullptr);
-}
-
-void JitInstrumentationListener::MethodEntered(Thread* thread,
-                                               mirror::Object* /*this_object*/,
-                                               ArtMethod* method,
-                                               uint32_t /*dex_pc*/) {
-  if (UNLIKELY(Runtime::Current()->GetJit()->JitAtFirstUse())) {
-    // The compiler requires a ProfilingInfo object.
-    ProfilingInfo::Create(thread, method, /* retry_allocation */ true);
-    JitCompileTask compile_task(method, JitCompileTask::kCompile);
-    compile_task.Run(thread);
-    return;
-  }
-
-  ProfilingInfo* profiling_info = method->GetProfilingInfo(sizeof(void*));
-  // Update the entrypoint if the ProfilingInfo has one. The interpreter will call it
-  // instead of interpreting the method.
-  // We avoid doing this if exit stubs are installed to not mess with the instrumentation.
-  // TODO(ngeoffray): Clean up instrumentation and code cache interactions.
-  if ((profiling_info != nullptr) &&
-      (profiling_info->GetSavedEntryPoint() != nullptr) &&
-      !Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled()) {
-    method->SetEntryPointFromQuickCompiledCode(profiling_info->GetSavedEntryPoint());
-  } else {
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
-}
-
-void JitInstrumentationListener::Branch(Thread* thread,
-                                        ArtMethod* method,
-                                        uint32_t dex_pc ATTRIBUTE_UNUSED,
-                                        int32_t dex_pc_offset) {
-  if (dex_pc_offset < 0) {
-    // Increment method hotness if it is a backward branch.
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
-}
-
-void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
-                                                          mirror::Object* this_object,
-                                                          ArtMethod* caller,
-                                                          uint32_t dex_pc,
-                                                          ArtMethod* callee ATTRIBUTE_UNUSED) {
-  // We make sure we cannot be suspended, as the profiling info can be concurrently deleted.
-  instrumentation_cache_->AddSamples(thread, caller, 1);
-  DCHECK(this_object != nullptr);
-  ProfilingInfo* info = caller->GetProfilingInfo(sizeof(void*));
-  if (info != nullptr) {
-    // Since the instrumentation is marked from the declaring class we need to mark the card so
-    // that mod-union tables and card rescanning know about the update.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
-    info->AddInvokeInfo(dex_pc, this_object->GetClass());
-  }
-}
-
-void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
-  if (thread_pool_ != nullptr) {
-    thread_pool_->Wait(self, false, false);
-  }
-}
-
-}  // namespace jit
-}  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
deleted file mode 100644
index d0545f8..0000000
--- a/runtime/jit/jit_instrumentation.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Copyright 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
-#define ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
-
-#include <unordered_map>
-
-#include "instrumentation.h"
-
-#include "atomic.h"
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "gc_root.h"
-#include "jni.h"
-#include "object_callbacks.h"
-#include "thread_pool.h"
-
-namespace art {
-namespace mirror {
-  class Object;
-  class Throwable;
-}  // namespace mirror
-class ArtField;
-class ArtMethod;
-union JValue;
-class Thread;
-
-namespace jit {
-static constexpr int16_t kJitCheckForOSR = -1;
-static constexpr int16_t kJitHotnessDisabled = -2;
-
-class JitInstrumentationCache;
-
-class JitInstrumentationListener : public instrumentation::InstrumentationListener {
- public:
-  explicit JitInstrumentationListener(JitInstrumentationCache* cache);
-
-  void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
-                     ArtMethod* method, uint32_t /*dex_pc*/)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                    const JValue& /*return_value*/)
-      OVERRIDE { }
-  void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
-  void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                 ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                 ArtField* /*field*/) OVERRIDE { }
-  void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                    ArtField* /*field*/, const JValue& /*field_value*/)
-      OVERRIDE { }
-  void ExceptionCaught(Thread* /*thread*/,
-                       mirror::Throwable* /*exception_object*/) OVERRIDE { }
-
-  void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
-                  ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
-
-  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
-
-  void InvokeVirtualOrInterface(Thread* thread,
-                                mirror::Object* this_object,
-                                ArtMethod* caller,
-                                uint32_t dex_pc,
-                                ArtMethod* callee)
-      OVERRIDE
-      REQUIRES(Roles::uninterruptible_)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  static constexpr uint32_t kJitEvents =
-      instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kInvokeVirtualOrInterface;
-
- private:
-  JitInstrumentationCache* const instrumentation_cache_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener);
-};
-
-// Keeps track of which methods are hot.
-class JitInstrumentationCache {
- public:
-  JitInstrumentationCache(uint16_t hot_method_threshold,
-                          uint16_t warm_method_threshold,
-                          uint16_t osr_method_threshold,
-                          uint16_t priority_thread_weight);
-  void AddSamples(Thread* self, ArtMethod* method, uint16_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateThreadPool();
-  void DeleteThreadPool(Thread* self);
-
-  size_t OSRMethodThreshold() const {
-    return osr_method_threshold_;
-  }
-
-  size_t HotMethodThreshold() const {
-    return hot_method_threshold_;
-  }
-
-  size_t WarmMethodThreshold() const {
-    return warm_method_threshold_;
-  }
-
-  size_t PriorityThreadWeight() const {
-    return priority_thread_weight_;
-  }
-
-  // Wait until there is no more pending compilation tasks.
-  void WaitForCompilationToFinish(Thread* self);
-
- private:
-  uint16_t hot_method_threshold_;
-  uint16_t warm_method_threshold_;
-  uint16_t osr_method_threshold_;
-  uint16_t priority_thread_weight_;
-  JitInstrumentationListener listener_;
-  std::unique_ptr<ThreadPool> thread_pool_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
-};
-
-}  // namespace jit
-}  // namespace art
-
-#endif  // ART_RUNTIME_JIT_JIT_INSTRUMENTATION_H_
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 78e372a..3f95772 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -492,10 +492,21 @@
     const ImageInfo* image_info = GetImageInfo();
     if (image_info == nullptr) {
       VLOG(oat) << "No image for oat image checksum to match against.";
-      return true;
-    }
 
-    if (file.GetOatHeader().GetImageFileLocationOatChecksum() != GetCombinedImageChecksum()) {
+      if (HasOriginalDexFiles()) {
+        return true;
+      }
+
+      // If there is no original dex file to fall back to, grudgingly accept
+      // the oat file. This could technically lead to crashes, but there's no
+      // way we could find a better oat file to use for this dex location,
+      // and it's better than being stuck in a boot loop with no way out.
+      // The problem will hopefully resolve itself the next time the runtime
+      // starts up.
+      LOG(WARNING) << "Dex location " << dex_location_ << " does not seem to include dex file. "
+        << "Allow oat file use. This is potentially dangerous.";
+    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
+        != GetCombinedImageChecksum()) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
       return true;
     }
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 94f6345..3846605 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -364,7 +364,7 @@
 
       // However, if the app was part of /system and preopted, there is no original dex file
       // available. In that case grudgingly accept the oat file.
-      if (!DexFile::MaybeDex(dex_location)) {
+      if (!oat_file_assistant.HasOriginalDexFiles()) {
         accept_oat_file = true;
         LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. "
                      << "Allow oat file use. This is potentially dangerous.";
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 37bb4c1..2489e45 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1921,16 +1921,7 @@
   }
   std::string error_msg;
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
-  if (jit_.get() != nullptr) {
-    jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
-                                     jit_options_->GetWarmupThreshold(),
-                                     jit_options_->GetOsrThreshold(),
-                                     jit_options_->GetPriorityThreadWeight());
-    jit_->CreateThreadPool();
-
-    // Notify native debugger about the classes already loaded before the creation of the jit.
-    jit_->DumpTypeInfoForLoadedTypes(GetClassLinker());
-  } else {
+  if (jit_.get() == nullptr) {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
   }
 }
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index cbd0414..a0987b5 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -790,9 +790,16 @@
         } else if (method_access_flags_ & kAccFinal) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have final methods";
           return false;
-        } else if (!(method_access_flags_ & kAccPublic)) {
-          Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "interfaces may not have non-public members";
-          return false;
+        } else {
+          uint32_t access_flag_options = kAccPublic;
+          if (dex_file_->GetVersion() >= DexFile::kDefaultMethodsVersion) {
+            access_flag_options |= kAccPrivate;
+          }
+          if (!(method_access_flags_ & access_flag_options)) {
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD)
+                << "interfaces may not have protected or package-private members";
+            return false;
+          }
         }
       }
     }
@@ -3794,9 +3801,12 @@
   // Note: this check must be after the initializer check, as those are required to fail a class,
   //       while this check implies an IncompatibleClassChangeError.
   if (klass->IsInterface()) {
-    // methods called on interfaces should be invoke-interface, invoke-super, or invoke-static.
+    // methods called on interfaces should be invoke-interface, invoke-super, invoke-direct (if
+    // dex file version is 37 or greater), or invoke-static.
     if (method_type != METHOD_INTERFACE &&
         method_type != METHOD_STATIC &&
+        ((dex_file_->GetVersion() < DexFile::kDefaultMethodsVersion) ||
+         method_type != METHOD_DIRECT) &&
         method_type != METHOD_SUPER) {
       Fail(VERIFY_ERROR_CLASS_CHANGE)
           << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
index d913efe..bbbb0a6 100644
--- a/test/141-class-unload/jni_unload.cc
+++ b/test/141-class-unload/jni_unload.cc
@@ -19,7 +19,6 @@
 #include <iostream>
 
 #include "jit/jit.h"
-#include "jit/jit_instrumentation.h"
 #include "runtime.h"
 #include "thread-inl.h"
 
@@ -29,7 +28,7 @@
 extern "C" JNIEXPORT void JNICALL Java_IntHolder_waitForCompilation(JNIEnv*, jclass) {
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr) {
-    jit->GetInstrumentationCache()->WaitForCompilationToFinish(Thread::Current());
+    jit->WaitForCompilationToFinish(Thread::Current());
   }
 }
 
diff --git a/test/147-stripped-dex-fallback/expected.txt b/test/147-stripped-dex-fallback/expected.txt
new file mode 100644
index 0000000..af5626b
--- /dev/null
+++ b/test/147-stripped-dex-fallback/expected.txt
@@ -0,0 +1 @@
+Hello, world!
diff --git a/test/147-stripped-dex-fallback/info.txt b/test/147-stripped-dex-fallback/info.txt
new file mode 100644
index 0000000..72a2ca8
--- /dev/null
+++ b/test/147-stripped-dex-fallback/info.txt
@@ -0,0 +1,2 @@
+Verify that we fallback to running out of dex code in the oat file if there is
+no image and the original dex code has been stripped.
diff --git a/test/147-stripped-dex-fallback/run b/test/147-stripped-dex-fallback/run
new file mode 100755
index 0000000..e594010
--- /dev/null
+++ b/test/147-stripped-dex-fallback/run
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ensure flags includes prebuild.
+flags="$@"
+if [[ "${flags}" == *--no-prebuild* ]] ; then
+  echo "Test 147-stripped-dex-fallback is not intended to run in no-prebuild mode."
+  exit 1
+fi
+
+${RUN} ${flags} --strip-dex --no-dex2oat
diff --git a/test/147-stripped-dex-fallback/src/Main.java b/test/147-stripped-dex-fallback/src/Main.java
new file mode 100644
index 0000000..1ef6289
--- /dev/null
+++ b/test/147-stripped-dex-fallback/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+  }
+}
diff --git a/test/594-checker-regression-irreducible-linorder/expected.txt b/test/594-checker-regression-irreducible-linorder/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/594-checker-regression-irreducible-linorder/expected.txt
diff --git a/test/594-checker-regression-irreducible-linorder/info.txt b/test/594-checker-regression-irreducible-linorder/info.txt
new file mode 100644
index 0000000..a1783f8
--- /dev/null
+++ b/test/594-checker-regression-irreducible-linorder/info.txt
@@ -0,0 +1,2 @@
+Regression test for a failing DCHECK in SSA liveness analysis in the presence
+of irreducible loops.
diff --git a/test/594-checker-regression-irreducible-linorder/smali/IrreducibleLoop.smali b/test/594-checker-regression-irreducible-linorder/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..8e01084
--- /dev/null
+++ b/test/594-checker-regression-irreducible-linorder/smali/IrreducibleLoop.smali
@@ -0,0 +1,64 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+.super Ljava/lang/Object;
+
+# Test case where liveness analysis produces linear order where loop blocks are
+# not adjacent.
+
+## CHECK-START: int IrreducibleLoop.liveness(boolean, boolean, boolean, int) builder (after)
+## CHECK-DAG:     Add loop:none
+## CHECK-DAG:     Mul loop:<<Loop:B\d+>>
+## CHECK-DAG:     Not loop:<<Loop>>
+
+## CHECK-START: int IrreducibleLoop.liveness(boolean, boolean, boolean, int) liveness (after)
+## CHECK-DAG:     Add liveness:<<LPreEntry:\d+>>
+## CHECK-DAG:     Mul liveness:<<LHeader:\d+>>
+## CHECK-DAG:     Not liveness:<<LBackEdge:\d+>>
+## CHECK-EVAL:    (<<LHeader>> < <<LPreEntry>>) and (<<LPreEntry>> < <<LBackEdge>>)
+
+.method public static liveness(ZZZI)I
+   .registers 10
+   const/16 v0, 42
+
+   if-eqz p0, :header
+
+   :pre_entry
+   add-int/2addr p3, p3
+   invoke-static {v0}, Ljava/lang/System;->exit(I)V
+   goto :body1
+
+   :header
+   mul-int/2addr p3, p3
+   if-eqz p1, :body2
+
+   :body1
+   goto :body_merge
+
+   :body2
+   invoke-static {v0}, Ljava/lang/System;->exit(I)V
+   goto :body_merge
+
+   :body_merge
+   if-eqz p2, :exit
+
+   :back_edge
+   not-int p3, p3
+   goto :header
+
+   :exit
+   return p3
+
+.end method
diff --git a/test/594-checker-regression-irreducible-linorder/src/Main.java b/test/594-checker-regression-irreducible-linorder/src/Main.java
new file mode 100644
index 0000000..38b2ab4
--- /dev/null
+++ b/test/594-checker-regression-irreducible-linorder/src/Main.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {
+    // Nothing to run. This regression test merely makes sure the smali test
+    // case successfully compiles.
+  }
+}
diff --git a/test/955-lambda-smali/build b/test/955-lambda-smali/build
new file mode 100755
index 0000000..14230c2
--- /dev/null
+++ b/test/955-lambda-smali/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental default-methods
diff --git a/test/975-iface-private/build b/test/975-iface-private/build
new file mode 100755
index 0000000..14230c2
--- /dev/null
+++ b/test/975-iface-private/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental default-methods
diff --git a/test/975-iface-private/expected.txt b/test/975-iface-private/expected.txt
new file mode 100644
index 0000000..908a8f2
--- /dev/null
+++ b/test/975-iface-private/expected.txt
@@ -0,0 +1,4 @@
+Saying hi from class
+HELLO!
+Saying hi from interface
+HELLO!
diff --git a/test/975-iface-private/info.txt b/test/975-iface-private/info.txt
new file mode 100644
index 0000000..d5a8d3f
--- /dev/null
+++ b/test/975-iface-private/info.txt
@@ -0,0 +1,5 @@
+Smali-based tests for experimental interface private methods.
+
+This test cannot be run with --jvm.
+
+This test checks that synthetic private methods in interfaces work correctly.
diff --git a/test/975-iface-private/smali/Iface.smali b/test/975-iface-private/smali/Iface.smali
new file mode 100644
index 0000000..a9a44d1
--- /dev/null
+++ b/test/975-iface-private/smali/Iface.smali
@@ -0,0 +1,45 @@
+
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface {
+#   public default void sayHi() {
+#     System.out.println(getHiWords());
+#   }
+#
+#   // Synthetic method
+#   private String getHiWords() {
+#     return "HELLO!";
+#   }
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public sayHi()V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-direct {p0}, LIface;->getHiWords()Ljava/lang/String;
+    move-result-object v1
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    return-void
+.end method
+
+.method private synthetic getHiWords()Ljava/lang/String;
+    .locals 1
+    const-string v0, "HELLO!"
+    return-object v0
+.end method
diff --git a/test/975-iface-private/smali/Main.smali b/test/975-iface-private/smali/Main.smali
new file mode 100644
index 0000000..dbde203
--- /dev/null
+++ b/test/975-iface-private/smali/Main.smali
@@ -0,0 +1,71 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main implements Iface {
+#   public static void main(String[] args) {
+#     Main m = new Main();
+#     sayHiMain(m);
+#     sayHiIface(m);
+#   }
+#   public static void sayHiMain(Main m) {
+#     System.out.println("Saying hi from class");
+#     m.sayHi();
+#   }
+#   public static void sayHiIface(Iface m) {
+#     System.out.println("Saying hi from interface");
+#     m.sayHi();
+#   }
+# }
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 2
+    new-instance v0, LMain;
+    invoke-direct {v0}, LMain;-><init>()V
+
+    invoke-static {v0}, LMain;->sayHiMain(LMain;)V
+    invoke-static {v0}, LMain;->sayHiIface(LIface;)V
+
+    return-void
+.end method
+
+.method public static sayHiMain(LMain;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Saying hi from class"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->sayHi()V
+    return-void
+.end method
+
+.method public static sayHiIface(LIface;)V
+    .locals 2
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v1, "Saying hi from interface"
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->sayHi()V
+    return-void
+.end method
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 1edc599..c5e07de 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -287,6 +287,7 @@
 # 529 and 555: b/27784033
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
   117-nopatchoat \
+  147-stripped-dex-fallback \
   554-jit-profile-file \
   529-checker-unresolved \
   555-checker-regression-x86const
diff --git a/test/etc/default-build b/test/etc/default-build
index 3d84821..962ae38 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -69,10 +69,13 @@
 JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
 JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24"
 
+declare -A SMALI_EXPERIMENTAL_ARGS
+SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24"
+
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
-    option="$1"
+    on="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
   elif [ "x$1" = "x--jvm" ]; then
@@ -110,6 +113,7 @@
 # Add args from the experimental mappings.
 for experiment in ${EXPERIMENTAL}; do
   JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+  SMALI_ARGS="${SMALI_ARGS} ${SMALI_EXPERIMENTAL_ARGS[${experiment}]}"
 done
 
 if [ -e classes.dex ]; then
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 28a99de..d61fc8f 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -37,6 +37,7 @@
 PREBUILD="y"
 QUIET="n"
 RELOCATE="y"
+STRIP_DEX="n"
 SECONDARY_DEX=""
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
@@ -118,6 +119,9 @@
     elif [ "x$1" = "x--prebuild" ]; then
         PREBUILD="y"
         shift
+    elif [ "x$1" = "x--strip-dex" ]; then
+        STRIP_DEX="y"
+        shift
     elif [ "x$1" = "x--host" ]; then
         HOST="y"
         ANDROID_ROOT="$ANDROID_HOST_OUT"
@@ -380,6 +384,7 @@
 
 dex2oat_cmdline="true"
 mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+strip_cmdline="true"
 
 # Pick a base that will force the app image to get relocated.
 app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
@@ -409,6 +414,10 @@
   fi
 fi
 
+if [ "$STRIP_DEX" = "y" ]; then
+  strip_cmdline="zip --quiet --delete $DEX_LOCATION/$TEST_NAME.jar classes.dex"
+fi
+
 DALVIKVM_ISA_FEATURES_ARGS=""
 if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
   DALVIKVM_ISA_FEATURES_ARGS="-Xcompiler-option --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
@@ -478,6 +487,7 @@
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
              $dex2oat_cmdline && \
+             $strip_cmdline && \
              $dalvikvm_cmdline"
 
     cmdfile=$(tempfile -p "cmd-" -s "-$TEST_NAME")
@@ -548,13 +558,7 @@
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      if [ "$PREBUILD" = "y" ]; then
-        echo "$mkdir_cmdline && $dex2oat_cmdline && $cmdline"
-      elif [ "$RELOCATE" = "y" ]; then
-        echo "$mkdir_cmdline && $cmdline"
-      else
-        echo $cmdline
-      fi
+      echo "$mkdir_cmdline && $dex2oat_cmdline && $strip_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
@@ -562,6 +566,7 @@
     rm -rf ${DEX_LOCATION}/dalvik-cache/
     $mkdir_cmdline || exit 1
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
+    $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
 
     # For running, we must turn off logging when dex2oat or patchoat are missing. Otherwise we use
     # the same defaults as for prebuilt: everything when --dev, otherwise errors and above only.
diff --git a/test/run-test b/test/run-test
index 01464cd..fc57d09 100755
--- a/test/run-test
+++ b/test/run-test
@@ -46,7 +46,7 @@
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
 export USE_JACK="true"
-export SMALI_ARGS="--experimental --api-level 23"
+export SMALI_ARGS="--experimental"
 
 # If dx was not set by the environment variable, assume it is in the path.
 if [ -z "$DX" ]; then
@@ -190,6 +190,9 @@
         run_args="${run_args} --prebuild"
         prebuild_mode="yes"
         shift;
+    elif [ "x$1" = "x--strip-dex" ]; then
+        run_args="${run_args} --strip-dex"
+        shift;
     elif [ "x$1" = "x--debuggable" ]; then
         run_args="${run_args} -Xcompiler-option --debuggable"
         debuggable="yes"
@@ -449,7 +452,7 @@
     if [ "$target_mode" = "no" ]; then
         framework="${ANDROID_PRODUCT_OUT}/system/framework"
         bpath="${framework}/core-libart.jar:${framework}/core-oj.jar:${framework}/conscrypt.jar:${framework}/okhttp.jar:${framework}/bouncycastle.jar:${framework}/ext.jar"
-        run_args="${run_args} --boot -Xbootclasspath:${bpath}"
+        run_args="${run_args} --boot --runtime-option -Xbootclasspath:${bpath}"
     else
         true # defaults to using target BOOTCLASSPATH
     fi
@@ -571,6 +574,7 @@
         echo "    --prebuild            Run dex2oat on the files before starting test. (default)"
         echo "    --no-prebuild         Do not run dex2oat on the files before starting"
         echo "                          the test."
+        echo "    --strip-dex           Strip the dex files before starting test."
         echo "    --relocate            Force the use of relocating in the test, making"
         echo "                          the image and oat files be relocated to a random"
         echo "                          address before running. (default)"
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 8422e20..354fcef 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -114,9 +114,6 @@
   art_debugee="$art_debugee -verbose:jdwp"
 fi
 
-# Use Jack with "1.8" configuration.
-export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
-
 # Run the tests using vogar.
 vogar $vm_command \
       $vm_args \
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 45fb4b4d..00bb3c5 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -109,7 +109,6 @@
 vogar_args="$vogar_args --timeout 480"
 
 # Use Jack with "1.8" configuration.
-export JACK_VERSION=`basename prebuilts/sdk/tools/jacks/*ALPHA* | sed 's/^jack-//' | sed 's/.jar$//'`
 vogar_args="$vogar_args --toolchain jack --language JN"
 
 # Run the tests using vogar.