Merge "imgdiag: Fix rarely flaky imgdiag_test."
diff --git a/Android.mk b/Android.mk
index fcf70ff..0d0003a 100644
--- a/Android.mk
+++ b/Android.mk
@@ -122,6 +122,16 @@
 include $(art_path)/test/Android.run-test.mk
 include $(art_path)/benchmark/Android.mk
 
+TEST_ART_ADB_ROOT_AND_REMOUNT := \
+    (adb root && \
+     adb wait-for-device remount && \
+     ((adb shell touch /system/testfile && \
+       (adb shell rm /system/testfile || true)) || \
+      (adb disable-verity && \
+       adb reboot && \
+       adb wait-for-device root && \
+       adb wait-for-device remount)))
+
 # Sync test files to the target, depends upon all things that must be pushed to the target.
 .PHONY: test-art-target-sync
 # Check if we need to sync. In case ART_TEST_ANDROID_ROOT is not empty,
@@ -130,12 +140,11 @@
 ifneq ($(ART_TEST_NO_SYNC),true)
 ifeq ($(ART_TEST_ANDROID_ROOT),)
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 else
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
-	adb root
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT)
 	adb push $(ANDROID_PRODUCT_OUT)/data /data
 endif
@@ -374,8 +383,7 @@
 
 .PHONY: oat-target-sync
 oat-target-sync: oat-target
-	adb root
-	adb wait-for-device remount
+	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb sync
 
 ########################################################################
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index cd9d18d..a93d8a8 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -118,7 +118,8 @@
 ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
-ART_TARGET_CLANG_mips64 :=
+# b/25928358, illegal instruction on mips64r6 with -O0
+ART_TARGET_CLANG_mips64 := false
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 420db43..edf107e 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -27,6 +27,10 @@
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
 ART_TEST_KNOWN_BROKEN :=
 
+# List of run-tests to skip running in any configuration. This needs to be the full name of the
+# run-test such as '457-regs'.
+ART_TEST_RUN_TEST_SKIP ?=
+
 # Failing valgrind tests.
 # Note: *all* 64b tests involving the runtime do not work currently. b/15170219.
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index ff41736..dcde5ab 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -74,6 +74,7 @@
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
 ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 ART_GTEST_oat_file_test_DEX_DEPS := Main MultiDex
+ART_GTEST_oat_test_DEX_DEPS := Main
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
@@ -187,6 +188,7 @@
   runtime/gc/accounting/card_table_test.cc \
   runtime/gc/accounting/mod_union_table_test.cc \
   runtime/gc/accounting/space_bitmap_test.cc \
+  runtime/gc/collector/immune_spaces_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/reference_queue_test.cc \
   runtime/gc/space/dlmalloc_space_base_test.cc \
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index f34b5ed..529143d 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -457,8 +457,10 @@
     EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJIT);
   }
   {
-    EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * KB), "-Xjitcodecachesize:16K", M::JITCodeCacheCapacity);
-    EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * MB), "-Xjitcodecachesize:16M", M::JITCodeCacheCapacity);
+    EXPECT_SINGLE_PARSE_VALUE(
+        MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity);
+    EXPECT_SINGLE_PARSE_VALUE(
+        MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity);
   }
   {
     EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold);
diff --git a/compiler/Android.mk b/compiler/Android.mk
index e74a68f..0ed843b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -67,9 +67,9 @@
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
 	optimizing/code_generator_utils.cc \
-	optimizing/constant_area_fixups_x86.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
@@ -82,9 +82,11 @@
 	optimizing/load_store_elimination.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
+	optimizing/nodes_arm64.cc \
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
+	optimizing/pc_relative_fixups_x86.cc \
 	optimizing/prepare_for_register_allocation.cc \
 	optimizing/primitive_type_propagation.cc \
 	optimizing/reference_type_propagation.cc \
@@ -219,7 +221,8 @@
   utils/mips/assembler_mips.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \
-  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips)
+  $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) \
+  utils/mips64/assembler_mips64.h
 
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 :=
 LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc
index 0940a80..4c66c76 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/buffered_output_stream.cc
@@ -20,17 +20,24 @@
 
 namespace art {
 
-BufferedOutputStream::BufferedOutputStream(OutputStream* out)
-    : OutputStream(out->GetLocation()), out_(out), used_(0) {}
+BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out)
+    : OutputStream(out->GetLocation()),  // Before out is moved to out_.
+      out_(std::move(out)),
+      used_(0) {}
+
+BufferedOutputStream::~BufferedOutputStream() {
+  FlushBuffer();
+}
 
 bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
-    Flush();
+    if (!FlushBuffer()) {
+      return false;
+    }
     return out_->WriteFully(buffer, byte_count);
   }
   if (used_ + byte_count > kBufferSize) {
-    bool success = Flush();
-    if (!success) {
+    if (!FlushBuffer()) {
       return false;
     }
   }
@@ -41,6 +48,10 @@
 }
 
 bool BufferedOutputStream::Flush() {
+  return FlushBuffer() && out_->Flush();
+}
+
+bool BufferedOutputStream::FlushBuffer() {
   bool success = true;
   if (used_ > 0) {
     success = out_->WriteFully(&buffer_[0], used_);
@@ -50,7 +61,7 @@
 }
 
 off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
-  if (!Flush()) {
+  if (!FlushBuffer()) {
     return -1;
   }
   return out_->Seek(offset, whence);
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
index 15fc033..1da3a69 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/buffered_output_stream.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
 #define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
 
+#include <memory>
+
 #include "output_stream.h"
 
 #include "globals.h"
@@ -25,26 +27,23 @@
 
 class BufferedOutputStream FINAL : public OutputStream {
  public:
-  explicit BufferedOutputStream(OutputStream* out);
+  explicit BufferedOutputStream(std::unique_ptr<OutputStream> out);
 
-  virtual ~BufferedOutputStream() {
-    Flush();
-    delete out_;
-  }
+  ~BufferedOutputStream() OVERRIDE;
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
 
  private:
   static const size_t kBufferSize = 8 * KB;
 
-  bool Flush();
+  bool FlushBuffer();
 
-  OutputStream* const out_;
-
+  std::unique_ptr<OutputStream> const out_;
   uint8_t buffer_[kBufferSize];
-
   size_t used_;
 
   DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 6fd4575..508b04a 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -48,11 +48,11 @@
     // Pretty-print CFI opcodes.
     constexpr bool is64bit = false;
     dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
-    dwarf::WriteDebugFrameCIE(is64bit, dwarf::DW_EH_PE_absptr, dwarf::Reg(8),
-                              initial_opcodes, kCFIFormat, &debug_frame_data_);
+    dwarf::WriteCIE(is64bit, dwarf::Reg(8),
+                    initial_opcodes, kCFIFormat, &debug_frame_data_);
     std::vector<uintptr_t> debug_frame_patches;
-    dwarf::WriteDebugFrameFDE(is64bit, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
-                              kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+    dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
+                    kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
     auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index c37ceca..e6cc50c 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -223,6 +223,11 @@
   compiler_kind_ = compiler_kind;
 }
 
+InstructionSet CommonCompilerTest::GetInstructionSet() const {
+  DCHECK(compiler_driver_.get() != nullptr);
+  return compiler_driver_->GetInstructionSet();
+}
+
 void CommonCompilerTest::TearDown() {
   timer_.reset();
   compiler_driver_.reset();
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 67b4428..1b57b7d 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -61,6 +61,8 @@
   Compiler::Kind GetCompilerKind() const;
   void SetCompilerKind(Compiler::Kind compiler_kind);
 
+  InstructionSet GetInstructionSet() const;
+
   // Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler
   // driver assumes ownership of the set, so the test should properly release the set.
   virtual std::unordered_set<std::string>* GetImageClasses();
@@ -115,6 +117,32 @@
     return; \
   }
 
+// TODO: When read barrier works with all compilers in use, get rid of this.
+#define TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK() \
+  if (kUseReadBarrier && GetCompilerKind() == Compiler::kQuick) { \
+    printf("WARNING: TEST DISABLED FOR READ BARRIER WITH QUICK\n"); \
+    return; \
+  }
+
+// TODO: When read barrier works with all Optimizing back ends, get rid of this.
+#define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
+  if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
+    switch (GetInstructionSet()) {                                                        \
+      case kArm64:                                                                        \
+      case kThumb2:                                                                       \
+      case kX86:                                                                          \
+      case kX86_64:                                                                       \
+        /* Instruction set has read barrier support. */                                   \
+        break;                                                                            \
+                                                                                          \
+      default:                                                                            \
+        /* Instruction set does not have barrier support. */                              \
+        printf("WARNING: TEST DISABLED FOR READ BARRIER WITH OPTIMIZING "                 \
+               "FOR THIS INSTRUCTION SET\n");                                             \
+        return;                                                                           \
+    }                                                                                     \
+  }
+
 // TODO: When non-PIC works with all compilers in use, get rid of this.
 #define TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING() \
   if (GetCompilerKind() == Compiler::kOptimizing) { \
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 8788dc1..3a9ce1b 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -22,6 +22,10 @@
 
 namespace art {
 
+namespace jit {
+  class JitCodeCache;
+}
+
 class ArtMethod;
 class Backend;
 struct CompilationUnit;
@@ -58,6 +62,13 @@
                                      uint32_t method_idx,
                                      const DexFile& dex_file) const = 0;
 
+  virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
+                          jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
+                          ArtMethod* method ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return false;
+  }
+
   virtual uintptr_t GetEntryPointOf(ArtMethod* method) const
      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 036da2e..b1acf5e 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -447,7 +447,7 @@
 
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
   // Emit relative calls anywhere in the image or within a dex file otherwise.
-  return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
+  return cu->compiler_driver->IsBootImage() || cu->dex_file == target_method.dex_file;
 }
 
 /*
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2b60a51..5da7214 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1104,7 +1104,11 @@
   // access because the verifier was unable to?
   const DexFile* dex_file = cu_->dex_file;
   CompilerDriver* driver = cu_->compiler_driver;
-  if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx, *dex_file, type_idx)) {
+  bool finalizable;
+  if (driver->CanAccessInstantiableTypeWithoutChecks(cu_->method_idx,
+                                                     *dex_file,
+                                                     type_idx,
+                                                     &finalizable)) {
     bool is_type_initialized;
     bool use_direct_type_ptr;
     uintptr_t direct_type_ptr;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 6673ea8..05dde9f 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -673,6 +673,12 @@
     return nullptr;
   }
 
+  if (kEmitCompilerReadBarrier) {
+    VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file)
+                   << "  Reason = Quick does not support read barrier.";
+    return nullptr;
+  }
+
   // TODO: check method fingerprint here to determine appropriate backend type.  Until then, use
   // build default.
   CompilerDriver* driver = GetCompilerDriver();
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 8eb37cf..0355f11 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -313,8 +313,9 @@
       concrete_method = reg_type.GetClass()->FindVirtualMethodForVirtual(
           abstract_method, pointer_size);
     }
-    if (concrete_method == nullptr || concrete_method->IsAbstract()) {
-      // In cases where concrete_method is not found, or is abstract, continue to the next invoke.
+    if (concrete_method == nullptr || !concrete_method->IsInvokable()) {
+      // In cases where concrete_method is not found, or is not invokable, continue to the next
+      // invoke.
       continue;
     }
     if (reg_type.IsPreciseReference() || concrete_method->IsFinal() ||
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 14ba81d..10841e6 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -329,7 +329,7 @@
       resolved_method->GetMethodIndex() < methods_class->GetVTableLength() &&
       (methods_class->GetVTableEntry(
           resolved_method->GetMethodIndex(), pointer_size) == resolved_method) &&
-      !resolved_method->IsAbstract();
+      resolved_method->IsInvokable();
 
   if (can_sharpen_virtual_based_on_type || can_sharpen_super_based_on_type) {
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
@@ -374,7 +374,7 @@
           class_loader, nullptr, kVirtual);
     }
     CHECK(called_method != nullptr);
-    CHECK(!called_method->IsAbstract());
+    CHECK(called_method->IsInvokable());
     int stats_flags = kFlagMethodResolved;
     GetCodeAndMethodForDirectCall(/*out*/invoke_type,
                                   kDirect,  // Sharp type
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d055b37..9d3af16 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -45,7 +45,6 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
 #include "profiler.h"
@@ -77,9 +76,6 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
-
 // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
@@ -341,7 +337,7 @@
                                Compiler::Kind compiler_kind,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures* instruction_set_features,
-                               bool image, std::unordered_set<std::string>* image_classes,
+                               bool boot_image, std::unordered_set<std::string>* image_classes,
                                std::unordered_set<std::string>* compiled_classes,
                                std::unordered_set<std::string>* compiled_methods,
                                size_t thread_count, bool dump_stats, bool dump_passes,
@@ -361,7 +357,7 @@
       compiled_methods_lock_("compiled method lock"),
       compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
-      image_(image),
+      boot_image_(boot_image),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
@@ -383,7 +379,7 @@
 
   compiler_->Init();
 
-  CHECK_EQ(image_, image_classes_.get() != nullptr);
+  CHECK_EQ(boot_image_, image_classes_.get() != nullptr);
 
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
@@ -559,7 +555,7 @@
     }
   } else if ((access_flags & kAccAbstract) != 0) {
     // Abstract methods don't have code.
-  } else if (Runtime::Current()->IsAotCompiler()) {
+  } else {
     const VerifiedMethod* verified_method =
         driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
     bool compile = compilation_enabled &&
@@ -598,13 +594,6 @@
               ? dex_to_dex_compilation_level
               : optimizer::DexToDexCompilationLevel::kRequired);
     }
-  } else {
-    // This is for the JIT compiler, which has already ensured the class is verified.
-    // We can go straight to compiling.
-    DCHECK(Runtime::Current()->UseJit());
-    compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
-                                                     class_def_idx, method_idx, class_loader,
-                                                     dex_file, dex_cache);
   }
   if (kTimeCompileMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
@@ -696,42 +685,6 @@
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 }
 
-CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) {
-  DCHECK_EQ(method,
-            method->GetInterfaceMethodIfProxy(
-                Runtime::Current()->GetClassLinker()->GetImagePointerSize()));
-  const uint32_t method_idx = method->GetDexMethodIndex();
-  const uint32_t access_flags = method->GetAccessFlags();
-  const InvokeType invoke_type = method->GetInvokeType();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      method->GetDeclaringClass()->GetClassLoader()));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
-  jobject jclass_loader = class_loader.ToJObject();
-  const DexFile* dex_file = method->GetDexFile();
-  const uint16_t class_def_idx = method->GetClassDefIndex();
-  const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-  optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
-      GetDexToDexCompilationLevel(self, *this, class_loader, *dex_file, class_def);
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
-  // Go to native so that we don't block GC during compilation.
-  ScopedThreadSuspension sts(self, kNative);
-  CompileMethod(self,
-                this,
-                code_item,
-                access_flags,
-                invoke_type,
-                class_def_idx,
-                method_idx,
-                jclass_loader,
-                *dex_file,
-                dex_to_dex_compilation_level,
-                true,
-                dex_cache);
-  auto* compiled_method = GetCompiledMethod(MethodReference(dex_file, method_idx));
-  return compiled_method;
-}
-
 void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
@@ -781,7 +734,7 @@
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     // NOTE: Currently unreachable, all callers check IsImage().
     return false;
   } else {
@@ -790,7 +743,7 @@
 }
 
 bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
-  if (kRestrictCompilationFiltersToImage && !IsImage()) {
+  if (kRestrictCompilationFiltersToImage && !IsBootImage()) {
     return true;
   }
 
@@ -801,7 +754,7 @@
 }
 
 bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const {
-  if (kRestrictCompilationFiltersToImage && !IsImage()) {
+  if (kRestrictCompilationFiltersToImage && !IsBootImage()) {
     return true;
   }
 
@@ -889,7 +842,7 @@
 // Make a list of descriptors for classes to include in the image
 void CompilerDriver::LoadImageClasses(TimingLogger* timings) {
   CHECK(timings != nullptr);
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     return;
   }
 
@@ -1118,7 +1071,7 @@
 };
 
 void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
-  if (IsImage()) {
+  if (IsBootImage()) {
     TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
 
     Runtime* runtime = Runtime::Current();
@@ -1145,7 +1098,7 @@
     // Having the klass reference here implies that the klass is already loaded.
     return true;
   }
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     // Assume loaded only if klass is in the boot image. App classes cannot be assumed
     // loaded because we don't even know what class loader will be used to load them.
     bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace();
@@ -1157,25 +1110,23 @@
 }
 
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
-  if (IsImage() &&
-      IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
-          soa.Self(), dex_file, false);
-      mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-      if (resolved_class == nullptr) {
-        // Erroneous class.
-        stats_->TypeNotInDexCache();
-        return false;
-      }
-    }
+  bool result = false;
+  if ((IsBootImage() &&
+       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) ||
+      Runtime::Current()->UseJit()) {
+    ScopedObjectAccess soa(Thread::Current());
+    mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
+        soa.Self(), dex_file, false);
+    mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+    result = (resolved_class != nullptr);
+  }
+
+  if (result) {
     stats_->TypeInDexCache();
-    return true;
   } else {
     stats_->TypeNotInDexCache();
-    return false;
   }
+  return result;
 }
 
 bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file,
@@ -1183,7 +1134,7 @@
   // See also Compiler::ResolveDexFile
 
   bool result = false;
-  if (IsImage()) {
+  if (IsBootImage()) {
     // We resolve all const-string strings when building for the image.
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
@@ -1251,7 +1202,8 @@
 
 bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
                                                             const DexFile& dex_file,
-                                                            uint32_t type_idx) {
+                                                            uint32_t type_idx,
+                                                            bool* finalizable) {
   ScopedObjectAccess soa(Thread::Current());
   mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(
       soa.Self(), dex_file, false);
@@ -1259,8 +1211,11 @@
   mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == nullptr) {
     stats_->TypeNeedsAccessCheck();
+    // Be conservative.
+    *finalizable = true;
     return false;  // Unknown class needs access checks.
   }
+  *finalizable = resolved_class->IsFinalizable();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx);
   mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_);
   if (referrer_class == nullptr) {
@@ -1300,7 +1255,7 @@
   if (compiling_boot) {
     // boot -> boot class pointers.
     // True if the class is in the image at boot compiling time.
-    const bool is_image_class = IsImage() && IsImageClass(
+    const bool is_image_class = IsBootImage() && IsImageClass(
         dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_));
     // True if pc relative load works.
     if (is_image_class && support_boot_image_fixup) {
@@ -1548,7 +1503,7 @@
   }
   if (!use_dex_cache && force_relocations) {
     bool is_in_image;
-    if (IsImage()) {
+    if (IsBootImage()) {
       is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
     } else {
       is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
@@ -1595,7 +1550,7 @@
       *type = sharp_type;
     }
   } else {
-    auto* image_space = heap->GetImageSpace();
+    auto* image_space = heap->GetBootImageSpace();
     bool method_in_image = false;
     if (image_space != nullptr) {
       const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
@@ -2019,7 +1974,7 @@
 
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
-  if (IsImage()) {
+  if (IsBootImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
     TimingLogger::ScopedTiming t("Resolve Types", timings);
@@ -2077,8 +2032,14 @@
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
           soa.Self(), dex_file, false)));
       std::string error_msg;
-      if (verifier::MethodVerifier::VerifyClass(soa.Self(), &dex_file, dex_cache, class_loader,
-                                                &class_def, true, &error_msg) ==
+      if (verifier::MethodVerifier::VerifyClass(soa.Self(),
+                                                &dex_file,
+                                                dex_cache,
+                                                class_loader,
+                                                &class_def,
+                                                true /* allow soft failures */,
+                                                true /* log hard failures */,
+                                                &error_msg) ==
                                                     verifier::MethodVerifier::kHardFailure) {
         LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                    << " because: " << error_msg;
@@ -2101,8 +2062,8 @@
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
       // we rely on things working OK without verification when the decryption dialog is brought up.
       // So abort in a debug build if we find this violated.
-      DCHECK(!manager_->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class "
-          << PrettyClass(klass.Get()) << " failed to fully verify.";
+      DCHECK(!manager_->GetCompiler()->IsBootImage() || klass->IsVerified())
+          << "Boot classpath class " << PrettyClass(klass.Get()) << " failed to fully verify.";
     }
     soa.Self()->AssertNoPendingException();
   }
@@ -2222,7 +2183,7 @@
           if (!klass->IsInitialized()) {
             // We need to initialize static fields, we only do this for image classes that aren't
             // marked with the $NoPreloadHolder (which implies this should not be initialized early).
-            bool can_init_static_fields = manager_->GetCompiler()->IsImage() &&
+            bool can_init_static_fields = manager_->GetCompiler()->IsBootImage() &&
                 manager_->GetCompiler()->IsImageClass(descriptor) &&
                 !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
             if (can_init_static_fields) {
@@ -2286,7 +2247,7 @@
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   size_t thread_count;
-  if (IsImage()) {
+  if (IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
     thread_count = 1U;
   } else {
@@ -2304,7 +2265,7 @@
     CHECK(dex_file != nullptr);
     InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
-  if (IsImage()) {
+  if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
     Runtime::Current()->GetHeap()->CollectGarbage(true);
   }
@@ -2549,19 +2510,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::WriteElf(const std::string& android_root,
-                              bool is_host,
-                              const std::vector<const art::DexFile*>& dex_files,
-                              OatWriter* oat_writer,
-                              art::File* file)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
-    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  } else {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  }
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
     return false;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 4ed4dc6..1347b37 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -59,7 +59,6 @@
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
-class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -92,7 +91,7 @@
                  Compiler::Kind compiler_kind,
                  InstructionSet instruction_set,
                  const InstructionSetFeatures* instruction_set_features,
-                 bool image, std::unordered_set<std::string>* image_classes,
+                 bool boot_image, std::unordered_set<std::string>* image_classes,
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count, bool dump_stats, bool dump_passes,
@@ -119,9 +118,6 @@
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
-  CompiledMethod* CompileArtMethod(Thread* self, ArtMethod*)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!compiled_methods_lock_) WARN_UNUSED;
-
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -156,8 +152,8 @@
   }
 
   // Are we compiling and creating an image file?
-  bool IsImage() const {
-    return image_;
+  bool IsBootImage() const {
+    return boot_image_;
   }
 
   const std::unordered_set<std::string>* GetImageClasses() const {
@@ -214,8 +210,11 @@
       REQUIRES(!Locks::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
-  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
-                                              uint32_t type_idx)
+  // out_is_finalizable is set to whether the type is finalizable.
+  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
+                                              const DexFile& dex_file,
+                                              uint32_t type_idx,
+                                              bool* out_is_finalizable)
       REQUIRES(!Locks::mutator_lock_);
 
   bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
@@ -398,12 +397,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  bool WriteElf(const std::string& android_root,
-                bool is_host,
-                const std::vector<const DexFile*>& dex_files,
-                OatWriter* oat_writer,
-                File* file);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
@@ -482,6 +475,10 @@
     return &compiled_method_storage_;
   }
 
+  // Can we assume that the klass is loaded?
+  bool CanAssumeClassIsLoaded(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -516,10 +513,6 @@
   bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Can we assume that the klass is loaded?
-  bool CanAssumeClassIsLoaded(mirror::Class* klass)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
   enum {
@@ -637,7 +630,7 @@
   // in the .oat_patches ELF section if requested in the compiler options.
   size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
 
-  const bool image_;
+  const bool boot_image_;
 
   // If image_ is true, specifies the classes that will be included in
   // the image. Note if image_classes_ is null, all classes are
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 1107599..f8de9fa 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -147,6 +147,8 @@
 
 TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -193,6 +195,8 @@
 
 TEST_F(CompilerDriverMethodsTest, Selection) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
   {
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
index 60241f7..5a99641 100644
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -282,7 +282,12 @@
 
   bool IsEnabled() const { return enabled_; }
 
-  void SetEnabled(bool value) { enabled_ = value; }
+  void SetEnabled(bool value) {
+    enabled_ = value;
+    if (enabled_ && opcodes_.capacity() == 0u) {
+      opcodes_.reserve(kDefaultCapacity);
+    }
+  }
 
   int GetCurrentPC() const { return current_pc_; }
 
@@ -292,24 +297,24 @@
 
   using Writer<Vector>::data;
 
-  DebugFrameOpCodeWriter(bool enabled = true,
-                         const typename Vector::allocator_type& alloc =
-                             typename Vector::allocator_type())
+  explicit DebugFrameOpCodeWriter(bool enabled = true,
+                                  const typename Vector::allocator_type& alloc =
+                                      typename Vector::allocator_type())
       : Writer<Vector>(&opcodes_),
-        enabled_(enabled),
+        enabled_(false),
         opcodes_(alloc),
         current_cfa_offset_(0),
         current_pc_(0),
         uses_dwarf3_features_(false) {
-    if (enabled) {
-      // Best guess based on couple of observed outputs.
-      opcodes_.reserve(16);
-    }
+    SetEnabled(enabled);
   }
 
   virtual ~DebugFrameOpCodeWriter() { }
 
  protected:
+  // Best guess based on couple of observed outputs.
+  static constexpr size_t kDefaultCapacity = 32u;
+
   int FactorDataOffset(int offset) const {
     DCHECK_EQ(offset % kDataAlignmentFactor, 0);
     return offset / kDataAlignmentFactor;
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index d9b367b..a551e4b 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <unordered_map>
 
+#include "base/casts.h"
 #include "dwarf/dwarf_constants.h"
 #include "dwarf/writer.h"
 #include "leb128.h"
@@ -47,9 +48,9 @@
  * It also handles generation of abbreviations.
  *
  * Usage:
- *   StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ *   StartTag(DW_TAG_compile_unit);
  *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
- *     StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ *     StartTag(DW_TAG_subprogram);
  *       WriteStrp(DW_AT_name, "Foo", debug_str);
  *     EndTag();
  *   EndTag();
@@ -59,36 +60,40 @@
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
  public:
+  static constexpr size_t kCompilationUnitHeaderSize = 11;
+
   // Start debugging information entry.
-  void StartTag(Tag tag, Children children) {
-    DCHECK(has_children) << "This tag can not have nested tags";
+  // Returns offset of the entry in compilation unit.
+  size_t StartTag(Tag tag) {
     if (inside_entry_) {
       // Write abbrev code for the previous entry.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      // Parent entry is finalized before any children are written.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes));
       inside_entry_ = false;
     }
-    StartAbbrev(tag, children);
+    StartAbbrev(tag);
     // Abbrev code placeholder of sufficient size.
     abbrev_code_offset_ = this->data()->size();
     this->PushUleb128(NextAbbrevCode());
     depth_++;
     inside_entry_ = true;
-    has_children = (children == DW_CHILDREN_yes);
+    return abbrev_code_offset_ + kCompilationUnitHeaderSize;
   }
 
   // End debugging information entry.
   void EndTag() {
     DCHECK_GT(depth_, 0);
     if (inside_entry_) {
-      // Write abbrev code for this tag.
-      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      // Write abbrev code for this entry.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no));
       inside_entry_ = false;
-    }
-    if (has_children) {
-      this->PushUint8(0);  // End of children.
+      // This entry has no children and so there is no terminator.
+    } else {
+      // The entry has been already finalized so it must be parent entry
+      // and we need to write the terminator required by DW_CHILDREN_yes.
+      this->PushUint8(0);
     }
     depth_--;
-    has_children = true;  // Parent tag obviously has children.
   }
 
   void WriteAddr(Attribute attrib, uint64_t value) {
@@ -101,10 +106,16 @@
     }
   }
 
-  void WriteBlock(Attribute attrib, const void* ptr, int size) {
+  void WriteBlock(Attribute attrib, const void* ptr, size_t num_bytes) {
     AddAbbrevAttribute(attrib, DW_FORM_block);
-    this->PushUleb128(size);
-    this->PushData(ptr, size);
+    this->PushUleb128(num_bytes);
+    this->PushData(ptr, num_bytes);
+  }
+
+  void WriteExprLoc(Attribute attrib, const void* ptr, size_t num_bytes) {
+    AddAbbrevAttribute(attrib, DW_FORM_exprloc);
+    this->PushUleb128(dchecked_integral_cast<uint32_t>(num_bytes));
+    this->PushData(ptr, num_bytes);
   }
 
   void WriteData1(Attribute attrib, uint8_t value) {
@@ -127,6 +138,11 @@
     this->PushUint64(value);
   }
 
+  void WriteSecOffset(Attribute attrib, uint32_t offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
+    this->PushUint32(offset);
+  }
+
   void WriteSdata(Attribute attrib, int value) {
     AddAbbrevAttribute(attrib, DW_FORM_sdata);
     this->PushSleb128(value);
@@ -147,12 +163,12 @@
     this->PushUint8(value ? 1 : 0);
   }
 
-  void WriteRef4(Attribute attrib, int cu_offset) {
+  void WriteRef4(Attribute attrib, uint32_t cu_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_ref4);
     this->PushUint32(cu_offset);
   }
 
-  void WriteRef(Attribute attrib, int cu_offset) {
+  void WriteRef(Attribute attrib, uint32_t cu_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
     this->PushUleb128(cu_offset);
   }
@@ -162,16 +178,21 @@
     this->PushString(value);
   }
 
-  void WriteStrp(Attribute attrib, int address) {
+  void WriteStrp(Attribute attrib, size_t debug_str_offset) {
     AddAbbrevAttribute(attrib, DW_FORM_strp);
-    this->PushUint32(address);
+    this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
   }
 
-  void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+  void WriteStrp(Attribute attrib, const char* str, size_t len,
+                 std::vector<uint8_t>* debug_str) {
     AddAbbrevAttribute(attrib, DW_FORM_strp);
-    int address = debug_str->size();
-    debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
-    this->PushUint32(address);
+    this->PushUint32(debug_str->size());
+    debug_str->insert(debug_str->end(), str, str + len);
+    debug_str->push_back(0);
+  }
+
+  void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) {
+    WriteStrp(attrib, str, strlen(str), debug_str);
   }
 
   bool Is64bit() const { return is64bit_; }
@@ -180,7 +201,11 @@
     return patch_locations_;
   }
 
+  int Depth() const { return depth_; }
+
   using Writer<Vector>::data;
+  using Writer<Vector>::size;
+  using Writer<Vector>::UpdateUint32;
 
   DebugInfoEntryWriter(bool is64bitArch,
                        Vector* debug_abbrev,
@@ -196,16 +221,17 @@
   }
 
   ~DebugInfoEntryWriter() {
+    DCHECK(!inside_entry_);
     DCHECK_EQ(depth_, 0);
   }
 
  private:
   // Start abbreviation declaration.
-  void StartAbbrev(Tag tag, Children children) {
-    DCHECK(!inside_entry_);
+  void StartAbbrev(Tag tag) {
     current_abbrev_.clear();
     EncodeUnsignedLeb128(&current_abbrev_, tag);
-    current_abbrev_.push_back(children);
+    has_children_offset_ = current_abbrev_.size();
+    current_abbrev_.push_back(0);  // Place-holder for DW_CHILDREN.
   }
 
   // Add attribute specification.
@@ -220,8 +246,9 @@
   }
 
   // End abbreviation declaration and return its code.
-  int EndAbbrev() {
-    DCHECK(inside_entry_);
+  int EndAbbrev(Children has_children) {
+    DCHECK(!current_abbrev_.empty());
+    current_abbrev_[has_children_offset_] = has_children;
     auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
                                                   NextAbbrevCode()));
     int abbrev_code = it.first->second;
@@ -241,6 +268,7 @@
   // Fields for writing and deduplication of abbrevs.
   Writer<Vector> debug_abbrev_;
   Vector current_abbrev_;
+  size_t has_children_offset_ = 0;
   std::unordered_map<Vector, int,
                      FNVHash<Vector> > abbrev_codes_;
 
@@ -250,7 +278,6 @@
   int depth_ = 0;
   size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
   bool inside_entry_ = false;  // Entry ends at first child (if any).
-  bool has_children = true;
   std::vector<uintptr_t> patch_locations_;
 };
 
diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h
new file mode 100644
index 0000000..7fb21b7
--- /dev/null
+++ b/compiler/dwarf/dedup_vector.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+
+#include <vector>
+#include <unordered_map>
+
+namespace art {
+namespace dwarf {
+  class DedupVector {
+   public:
+    // Returns an offset to previously inserted identical block of data,
+    // or appends the data at the end of the vector and returns offset to it.
+    size_t Insert(const uint8_t* ptr, size_t num_bytes) {
+      // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+      uint32_t hash = 2166136261u;
+      for (size_t i = 0; i < num_bytes; i++) {
+        hash = (hash ^ ptr[i]) * 16777619u;
+      }
+      // Try to find existing copy of the data.
+      const auto& range = hash_to_offset_.equal_range(hash);
+      for (auto it = range.first; it != range.second; ++it) {
+        const size_t offset = it->second;
+        if (offset + num_bytes <= vector_.size() &&
+            memcmp(vector_.data() + offset, ptr, num_bytes) == 0) {
+          return offset;
+        }
+      }
+      // Append the data at the end of the vector.
+      const size_t new_offset = vector_.size();
+      hash_to_offset_.emplace(hash, new_offset);
+      vector_.insert(vector_.end(), ptr, ptr + num_bytes);
+      return new_offset;
+    }
+
+    const std::vector<uint8_t>& Data() const { return vector_; }
+
+   private:
+    struct IdentityHash {
+      size_t operator()(uint32_t v) const { return v; }
+    };
+
+    // We store the full hash as the key to simplify growing of the table.
+    // It avoids storing or referencing the actual data in the hash-table.
+    std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_;
+
+    std::vector<uint8_t> vector_;
+  };
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEDUP_VECTOR_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index 3ba380e..e9cd421 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -122,12 +122,12 @@
   DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
 
   DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(is64bit ? 16 : 8),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
+  WriteCIE(is64bit, Reg(is64bit ? 16 : 8),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
   std::vector<uintptr_t> debug_frame_patches;
   std::vector<uintptr_t> expected_patches { 28 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+  WriteFDE(is64bit, 0, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()),
+           kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
 
   EXPECT_EQ(expected_patches, debug_frame_patches);
   CheckObjdumpOutput(is64bit, "-W");
@@ -136,14 +136,14 @@
 TEST_F(DwarfTest, DebugFrame64) {
   constexpr bool is64bit = true;
   DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
+  WriteCIE(is64bit, Reg(16),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
   DebugFrameOpCodeWriter<> opcodes;
   std::vector<uintptr_t> debug_frame_patches;
   std::vector<uintptr_t> expected_patches { 32 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     ArrayRef<const uint8_t>(*opcodes.data()),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+  WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000,
+           ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
   DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
 
   EXPECT_EQ(expected_patches, debug_frame_patches);
@@ -176,12 +176,12 @@
   DW_CHECK_NEXT("DW_CFA_offset: r14 (r14)");
   DW_CHECK_NEXT("DW_CFA_offset: r15 (r15)");
   DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
+  WriteCIE(is64bit, Reg(16),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
   std::vector<uintptr_t> debug_frame_patches;
-  WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     ArrayRef<const uint8_t>(*opcodes.data()),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+  WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000,
+           ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
 
   CheckObjdumpOutput(is64bit, "-W");
 }
@@ -237,7 +237,7 @@
   std::vector<uintptr_t> debug_line_patches;
   std::vector<uintptr_t> expected_patches { 87 };  // NOLINT
   WriteDebugLineTable(include_directories, files, opcodes,
-                      &debug_line_data_, &debug_line_patches);
+                      0, &debug_line_data_, &debug_line_patches);
 
   EXPECT_EQ(expected_patches, debug_line_patches);
   CheckObjdumpOutput(is64bit, "-W");
@@ -276,7 +276,7 @@
   std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } };  // NOLINT
   std::vector<uintptr_t> debug_line_patches;
   WriteDebugLineTable(directories, files, opcodes,
-                      &debug_line_data_, &debug_line_patches);
+                      0, &debug_line_data_, &debug_line_patches);
 
   CheckObjdumpOutput(is64bit, "-W -WL");
 }
@@ -285,7 +285,7 @@
   constexpr bool is64bit = false;
   DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
   DW_CHECK("Contents of the .debug_info section:");
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.StartTag(dwarf::DW_TAG_compile_unit);
   DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
   info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
@@ -293,7 +293,7 @@
   DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
   info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
   DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  info.StartTag(dwarf::DW_TAG_subprogram);
   DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
   info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
@@ -302,7 +302,7 @@
   info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
   DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
   info.EndTag();  // DW_TAG_subprogram
-  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  info.StartTag(dwarf::DW_TAG_subprogram);
   DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
   info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
   DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
@@ -313,7 +313,7 @@
   info.EndTag();  // DW_TAG_subprogram
   info.EndTag();  // DW_TAG_compile_unit
   // Test that previous list was properly terminated and empty children.
-  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.StartTag(dwarf::DW_TAG_compile_unit);
   info.EndTag();  // DW_TAG_compile_unit
 
   // The abbrev table is just side product, but check it as well.
@@ -327,12 +327,12 @@
   DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
   DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
   DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
-  DW_CHECK("3      DW_TAG_compile_unit    [has children]");
+  DW_CHECK("3      DW_TAG_compile_unit    [no children]");
 
   std::vector<uintptr_t> debug_info_patches;
   std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 };  // NOLINT
   dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info,
-                          &debug_info_data_, &debug_info_patches);
+                          0, &debug_info_data_, &debug_info_patches);
 
   EXPECT_EQ(expected_patches, debug_info_patches);
   CheckObjdumpOutput(is64bit, "-W");
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index f819c49..5464ed9 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -59,38 +59,27 @@
   std::vector<std::string> Objdump(const char* args) {
     // Write simple elf file with just the DWARF sections.
     InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
-    class NoCode : public CodeOutput {
-      bool Write(OutputStream*) OVERRIDE { return true; }  // NOLINT
-    } no_code;
-    ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0);
-    typedef typename ElfBuilder<ElfTypes>::RawSection RawSection;
-    RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_frame(".debug_frame", SHT_PROGBITS, 0, nullptr, 0, 8, 0);
+    ScratchFile file;
+    FileOutputStream output_stream(file.GetFile());
+    ElfBuilder<ElfTypes> builder(isa, &output_stream);
+    builder.Start();
     if (!debug_info_data_.empty()) {
-      debug_info.SetBuffer(debug_info_data_);
-      builder.RegisterSection(&debug_info);
+      builder.WriteSection(".debug_info", &debug_info_data_);
     }
     if (!debug_abbrev_data_.empty()) {
-      debug_abbrev.SetBuffer(debug_abbrev_data_);
-      builder.RegisterSection(&debug_abbrev);
+      builder.WriteSection(".debug_abbrev", &debug_abbrev_data_);
     }
     if (!debug_str_data_.empty()) {
-      debug_str.SetBuffer(debug_str_data_);
-      builder.RegisterSection(&debug_str);
+      builder.WriteSection(".debug_str", &debug_str_data_);
     }
     if (!debug_line_data_.empty()) {
-      debug_line.SetBuffer(debug_line_data_);
-      builder.RegisterSection(&debug_line);
+      builder.WriteSection(".debug_line", &debug_line_data_);
     }
     if (!debug_frame_data_.empty()) {
-      debug_frame.SetBuffer(debug_frame_data_);
-      builder.RegisterSection(&debug_frame);
+      builder.WriteSection(".debug_frame", &debug_frame_data_);
     }
-    ScratchFile file;
-    builder.Write(file.GetFile());
+    builder.End();
+    EXPECT_TRUE(builder.Good());
 
     // Read the elf file back using objdump.
     std::vector<std::string> lines;
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index f3fba4b..f76f76f 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -38,15 +38,14 @@
 
 // Write common information entry (CIE) to .debug_frame or .eh_frame section.
 template<typename Vector>
-void WriteDebugFrameCIE(bool is64bit,
-                        ExceptionHeaderValueApplication address_type,
-                        Reg return_address_register,
-                        const DebugFrameOpCodeWriter<Vector>& opcodes,
-                        CFIFormat format,
-                        std::vector<uint8_t>* debug_frame) {
+void WriteCIE(bool is64bit,
+              Reg return_address_register,
+              const DebugFrameOpCodeWriter<Vector>& opcodes,
+              CFIFormat format,
+              std::vector<uint8_t>* buffer) {
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
-  Writer<> writer(debug_frame);
+  Writer<> writer(buffer);
   size_t cie_header_start_ = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
   writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF);  // CIE id.
@@ -57,17 +56,17 @@
   writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
   writer.PushUleb128(1);  // z: Augmentation data size.
   if (is64bit) {
-    if (address_type == DW_EH_PE_pcrel) {
+    if (format == DW_EH_FRAME_FORMAT) {
       writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8);   // R: Pointer encoding.
     } else {
-      DCHECK(address_type == DW_EH_PE_absptr);
+      DCHECK(format == DW_DEBUG_FRAME_FORMAT);
       writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8);  // R: Pointer encoding.
     }
   } else {
-    if (address_type == DW_EH_PE_pcrel) {
+    if (format == DW_EH_FRAME_FORMAT) {
       writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);   // R: Pointer encoding.
     } else {
-      DCHECK(address_type == DW_EH_PE_absptr);
+      DCHECK(format == DW_DEBUG_FRAME_FORMAT);
       writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
     }
   }
@@ -78,30 +77,44 @@
 
 // Write frame description entry (FDE) to .debug_frame or .eh_frame section.
 inline
-void WriteDebugFrameFDE(bool is64bit, size_t cie_offset,
-                        uint64_t initial_address, uint64_t address_range,
-                        const ArrayRef<const uint8_t>& opcodes,
-                        CFIFormat format,
-                        std::vector<uint8_t>* debug_frame,
-                        std::vector<uintptr_t>* debug_frame_patches) {
-  Writer<> writer(debug_frame);
+void WriteFDE(bool is64bit,
+              uint64_t section_address,  // Absolute address of the section.
+              uint64_t cie_address,  // Absolute address of last CIE.
+              uint64_t code_address,
+              uint64_t code_size,
+              const ArrayRef<const uint8_t>& opcodes,
+              CFIFormat format,
+              uint64_t buffer_address,  // Address of buffer in linked application.
+              std::vector<uint8_t>* buffer,
+              std::vector<uintptr_t>* patch_locations) {
+  CHECK_GE(cie_address, section_address);
+  CHECK_GE(buffer_address, section_address);
+
+  Writer<> writer(buffer);
   size_t fde_header_start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
   if (format == DW_EH_FRAME_FORMAT) {
-    uint32_t cie_pointer = writer.data()->size() - cie_offset;
+    uint32_t cie_pointer = (buffer_address + buffer->size()) - cie_address;
     writer.PushUint32(cie_pointer);
   } else {
-    uint32_t cie_pointer = cie_offset;
+    DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+    uint32_t cie_pointer = cie_address - section_address;
     writer.PushUint32(cie_pointer);
   }
-  // Relocate initial_address, but not address_range (it is size).
-  debug_frame_patches->push_back(writer.data()->size());
-  if (is64bit) {
-    writer.PushUint64(initial_address);
-    writer.PushUint64(address_range);
+  if (format == DW_EH_FRAME_FORMAT) {
+    // .eh_frame encodes the location as relative address.
+    code_address -= buffer_address + buffer->size();
   } else {
-    writer.PushUint32(initial_address);
-    writer.PushUint32(address_range);
+    DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+    // Relocate code_address if it has absolute value.
+    patch_locations->push_back(buffer_address + buffer->size() - section_address);
+  }
+  if (is64bit) {
+    writer.PushUint64(code_address);
+    writer.PushUint64(code_size);
+  } else {
+    writer.PushUint32(code_address);
+    writer.PushUint32(code_size);
   }
   writer.PushUleb128(0);  // Augmentation data size.
   writer.PushData(opcodes);
@@ -113,6 +126,7 @@
 template<typename Vector>
 void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
                       const DebugInfoEntryWriter<Vector>& entries,
+                      size_t debug_info_offset,  // offset from start of .debug_info.
                       std::vector<uint8_t>* debug_info,
                       std::vector<uintptr_t>* debug_info_patches) {
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
@@ -120,15 +134,16 @@
   Writer<> writer(debug_info);
   size_t start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
-  writer.PushUint16(3);  // Version.
+  writer.PushUint16(4);  // Version.
   writer.PushUint32(debug_abbrev_offset);
   writer.PushUint8(entries.Is64bit() ? 8 : 4);
   size_t entries_offset = writer.data()->size();
+  DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize);
   writer.PushData(*entries.data());
   writer.UpdateUint32(start, writer.data()->size() - start - 4);
   // Copy patch locations and make them relative to .debug_info section.
   for (uintptr_t patch_location : entries.GetPatchLocations()) {
-    debug_info_patches->push_back(entries_offset + patch_location);
+    debug_info_patches->push_back(debug_info_offset + entries_offset + patch_location);
   }
 }
 
@@ -144,6 +159,7 @@
 void WriteDebugLineTable(const std::vector<std::string>& include_directories,
                          const std::vector<FileEntry>& files,
                          const DebugLineOpCodeWriter<Vector>& opcodes,
+                         size_t debug_line_offset,  // offset from start of .debug_line.
                          std::vector<uint8_t>* debug_line,
                          std::vector<uintptr_t>* debug_line_patches) {
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
@@ -151,10 +167,7 @@
   Writer<> writer(debug_line);
   size_t header_start = writer.data()->size();
   writer.PushUint32(0);  // Section-length placeholder.
-  // Claim DWARF-2 version even though we use some DWARF-3 features.
-  // DWARF-2 consumers will ignore the unknown opcodes.
-  // This is what clang currently does.
-  writer.PushUint16(2);  // .debug_line version.
+  writer.PushUint16(3);  // .debug_line version.
   size_t header_length_pos = writer.data()->size();
   writer.PushUint32(0);  // Header-length placeholder.
   writer.PushUint8(1 << opcodes.GetCodeFactorBits());
@@ -184,7 +197,7 @@
   writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
   // Copy patch locations and make them relative to .debug_line section.
   for (uintptr_t patch_location : opcodes.GetPatchLocations()) {
-    debug_line_patches->push_back(opcodes_offset + patch_location);
+    debug_line_patches->push_back(debug_line_offset + opcodes_offset + patch_location);
   }
 }
 
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
new file mode 100644
index 0000000..a391e4d
--- /dev/null
+++ b/compiler/dwarf/method_debug_info.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+#define ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+
+#include "dex_file.h"
+
+namespace art {
+class CompiledMethod;
+namespace dwarf {
+
+struct MethodDebugInfo {
+  const DexFile* dex_file_;
+  size_t class_def_index_;
+  uint32_t dex_method_index_;
+  uint32_t access_flags_;
+  const DexFile::CodeItem* code_item_;
+  bool deduped_;
+  uint32_t low_pc_;
+  uint32_t high_pc_;
+  CompiledMethod* compiled_method_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index 7045237..b67e8dd 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -35,9 +35,10 @@
   //   Arm64 mapping is correct since we already do this there.
   //   libunwind might struggle with the new mapping as well.
 
-  static Reg ArmCore(int num) { return Reg(num); }
+  static Reg ArmCore(int num) { return Reg(num); }  // R0-R15.
   static Reg ArmFp(int num) { return Reg(64 + num); }  // S0–S31.
-  static Reg Arm64Core(int num) { return Reg(num); }
+  static Reg ArmDp(int num) { return Reg(256 + num); }  // D0–D31.
+  static Reg Arm64Core(int num) { return Reg(num); }  // X0-X31.
   static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
   static Reg MipsCore(int num) { return Reg(num); }
   static Reg Mips64Core(int num) { return Reg(num); }
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index 00b9dfa..d2add7f 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -114,9 +114,9 @@
     data_->insert(data_->end(), value, value + strlen(value) + 1);
   }
 
-  void PushData(const void* ptr, size_t size) {
+  void PushData(const void* ptr, size_t num_bytes) {
     const char* p = reinterpret_cast<const char*>(ptr);
-    data_->insert(data_->end(), p, p + size);
+    data_->insert(data_->end(), p, p + num_bytes);
   }
 
   template<typename Vector2>
@@ -164,6 +164,10 @@
     return data_;
   }
 
+  size_t size() const {
+    return data_->size();
+  }
+
   explicit Writer(Vector* buffer) : data_(buffer) { }
 
  private:
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bbd962f..c19bc3d 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -21,27 +21,59 @@
 
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/casts.h"
 #include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
+#include "leb128.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CodeOutput {
- public:
-  virtual bool Write(OutputStream* out) = 0;
-  virtual ~CodeOutput() {}
-};
-
 // Writes ELF file.
-// The main complication is that the sections often want to reference
-// each other.  We solve this by writing the ELF file in two stages:
-//  * Sections are asked about their size, and overall layout is calculated.
-//  * Sections do the actual writes which may use offsets of other sections.
+//
+// The basic layout of the elf file:
+//   Elf_Ehdr                    - The ELF header.
+//   Elf_Phdr[]                  - Program headers for the linker.
+//   .rodata                     - DEX files and oat metadata.
+//   .text                       - Compiled code.
+//   .bss                        - Zero-initialized writeable section.
+//   .dynstr                     - Names for .dynsym.
+//   .dynsym                     - A few oat-specific dynamic symbols.
+//   .hash                       - Hash-table for .dynsym.
+//   .dynamic                    - Tags which let the linker locate .dynsym.
+//   .strtab                     - Names for .symtab.
+//   .symtab                     - Debug symbols.
+//   .eh_frame                   - Unwind information (CFI).
+//   .eh_frame_hdr               - Index of .eh_frame.
+//   .debug_frame                - Unwind information (CFI).
+//   .debug_frame.oat_patches    - Addresses for relocation.
+//   .debug_info                 - Debug information.
+//   .debug_info.oat_patches     - Addresses for relocation.
+//   .debug_abbrev               - Decoding information for .debug_info.
+//   .debug_str                  - Strings for .debug_info.
+//   .debug_line                 - Line number tables.
+//   .debug_line.oat_patches     - Addresses for relocation.
+//   .text.oat_patches           - Addresses for relocation.
+//   .shstrtab                   - Names of ELF sections.
+//   Elf_Shdr[]                  - Section headers.
+//
+// Some section are optional (the debug sections in particular).
+//
+// We try write the section data directly into the file without much
+// in-memory buffering.  This means we generally write sections based on the
+// dependency order (e.g. .dynamic points to .dynsym which points to .text).
+//
+// In the cases where we need to buffer, we write the larger section first
+// and buffer the smaller one (e.g. .strtab is bigger than .symtab).
+//
+// The debug sections are written last for easier stripping.
+//
 template <typename ElfTypes>
 class ElfBuilder FINAL {
  public:
+  static constexpr size_t kMaxProgramHeaders = 16;
   using Elf_Addr = typename ElfTypes::Addr;
   using Elf_Off = typename ElfTypes::Off;
   using Elf_Word = typename ElfTypes::Word;
@@ -53,776 +85,443 @@
   using Elf_Dyn = typename ElfTypes::Dyn;
 
   // Base class of all sections.
-  class Section {
+  class Section : public OutputStream {
    public:
-    Section(const std::string& name, Elf_Word type, Elf_Word flags,
-            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
-        : header_(), section_index_(0), name_(name), link_(link) {
+    Section(ElfBuilder<ElfTypes>* owner, const std::string& name,
+            Elf_Word type, Elf_Word flags, const Section* link,
+            Elf_Word info, Elf_Word align, Elf_Word entsize)
+        : OutputStream(name), owner_(owner), header_(),
+          section_index_(0), name_(name), link_(link),
+          started_(false), finished_(false), phdr_flags_(PF_R), phdr_type_(0) {
+      DCHECK_GE(align, 1u);
       header_.sh_type = type;
       header_.sh_flags = flags;
       header_.sh_info = info;
       header_.sh_addralign = align;
       header_.sh_entsize = entsize;
     }
-    virtual ~Section() {}
 
-    // Returns the size of the content of this section.  It is used to
-    // calculate file offsets of all sections before doing any writes.
-    virtual Elf_Word GetSize() const = 0;
-
-    // Write the content of this section to the given file.
-    // This must write exactly the number of bytes returned by GetSize().
-    // Offsets of all sections are known when this method is called.
-    virtual bool Write(File* elf_file) = 0;
-
-    Elf_Word GetLink() const {
-      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
+    ~Section() OVERRIDE {
+      if (started_) {
+        CHECK(finished_);
+      }
     }
 
-    const Elf_Shdr* GetHeader() const {
-      return &header_;
+    // Start writing of this section.
+    void Start() {
+      CHECK(!started_);
+      CHECK(!finished_);
+      started_ = true;
+      auto& sections = owner_->sections_;
+      // Check that the previous section is complete.
+      CHECK(sections.empty() || sections.back()->finished_);
+      // The first ELF section index is 1. Index 0 is reserved for NULL.
+      section_index_ = sections.size() + 1;
+      // Push this section on the list of written sections.
+      sections.push_back(this);
+      // Align file position.
+      if (header_.sh_type != SHT_NOBITS) {
+        header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign);
+        owner_->Seek(header_.sh_offset, kSeekSet);
+      }
+      // Align virtual memory address.
+      if ((header_.sh_flags & SHF_ALLOC) != 0) {
+        header_.sh_addr = RoundUp(owner_->virtual_address_, header_.sh_addralign);
+        owner_->virtual_address_ = header_.sh_addr;
+      }
     }
 
-    Elf_Shdr* GetHeader() {
-      return &header_;
+    // Finish writing of this section.
+    void End() {
+      CHECK(started_);
+      CHECK(!finished_);
+      finished_ = true;
+      if (header_.sh_type == SHT_NOBITS) {
+        CHECK_GT(header_.sh_size, 0u);
+      } else {
+        // Use the current file position to determine section size.
+        off_t file_offset = owner_->Seek(0, kSeekCurrent);
+        CHECK_GE(file_offset, (off_t)header_.sh_offset);
+        header_.sh_size = file_offset - header_.sh_offset;
+      }
+      if ((header_.sh_flags & SHF_ALLOC) != 0) {
+        owner_->virtual_address_ += header_.sh_size;
+      }
+    }
+
+    // Get the location of this section in virtual memory.
+    Elf_Addr GetAddress() const {
+      CHECK(started_);
+      return header_.sh_addr;
+    }
+
+    // Returns the size of the content of this section.
+    Elf_Word GetSize() const {
+      if (finished_) {
+        return header_.sh_size;
+      } else {
+        CHECK(started_);
+        CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
+        return owner_->Seek(0, kSeekCurrent) - header_.sh_offset;
+      }
+    }
+
+    // Set desired allocation size for .bss section.
+    void SetSize(Elf_Word size) {
+      CHECK_EQ(header_.sh_type, (Elf_Word)SHT_NOBITS);
+      header_.sh_size = size;
+    }
+
+    // This function always succeeds to simplify code.
+    // Use builder's Good() to check the actual status.
+    bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+      CHECK(started_);
+      CHECK(!finished_);
+      owner_->WriteFully(buffer, byte_count);
+      return true;
+    }
+
+    // This function always succeeds to simplify code.
+    // Use builder's Good() to check the actual status.
+    off_t Seek(off_t offset, Whence whence) OVERRIDE {
+      // Forward the seek as-is and trust the caller to use it reasonably.
+      return owner_->Seek(offset, whence);
+    }
+
+    // This function flushes the output and returns whether it succeeded.
+    // If there was a previous failure, this does nothing and returns false, i.e. failed.
+    bool Flush() OVERRIDE {
+      return owner_->Flush();
     }
 
     Elf_Word GetSectionIndex() const {
+      DCHECK(started_);
       DCHECK_NE(section_index_, 0u);
       return section_index_;
     }
 
-    void SetSectionIndex(Elf_Word section_index) {
-      section_index_ = section_index;
-    }
-
-    const std::string& GetName() const {
-      return name_;
-    }
-
    private:
+    ElfBuilder<ElfTypes>* owner_;
     Elf_Shdr header_;
     Elf_Word section_index_;
     const std::string name_;
     const Section* const link_;
+    bool started_;
+    bool finished_;
+    Elf_Word phdr_flags_;
+    Elf_Word phdr_type_;
+
+    friend class ElfBuilder;
 
     DISALLOW_COPY_AND_ASSIGN(Section);
   };
 
-  // Writer of .dynamic section.
-  class DynamicSection FINAL : public Section {
-   public:
-    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
-      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
-      dynamics_.push_back({tag, value, section});
-    }
-
-    DynamicSection(const std::string& name, Section* link)
-        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
-                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
-
-    Elf_Word GetSize() const OVERRIDE {
-      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      std::vector<Elf_Dyn> buffer;
-      buffer.reserve(dynamics_.size() + 1u);
-      for (const ElfDynamicState& it : dynamics_) {
-        if (it.section_ != nullptr) {
-          // We are adding an address relative to a section.
-          buffer.push_back(
-              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
-        } else {
-          buffer.push_back({it.tag_, {it.value_}});
-        }
-      }
-      buffer.push_back({DT_NULL, {0}});
-      return WriteArray(elf_file, buffer.data(), buffer.size());
-    }
-
-   private:
-    struct ElfDynamicState {
-      Elf_Sword tag_;
-      Elf_Word value_;
-      const Section* section_;
-    };
-    std::vector<ElfDynamicState> dynamics_;
-  };
-
-  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
-                           Elf_Addr buffer_address,
-                           Elf_Addr base_address,
-                           std::vector<uint8_t>* buffer);
-
-  // Section with content based on simple memory buffer.
-  // The buffer can be optionally patched before writing.
-  class RawSection FINAL : public Section {
-   public:
-    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
-               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
-               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
-        : Section(name, type, flags, link, info, align, entsize),
-          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
-    }
-
-    RawSection(const std::string& name, Elf_Word type)
-        : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return buffer_.size();
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      if (!patch_locations_.empty()) {
-        DCHECK(!patched_);  // Do not patch twice.
-        DCHECK(patch_ != nullptr);
-        DCHECK(patch_base_section_ != nullptr);
-        patch_(patch_locations_,
-               this->GetHeader()->sh_addr,
-               patch_base_section_->GetHeader()->sh_addr,
-               &buffer_);
-        patched_ = true;
-      }
-      return WriteArray(elf_file, buffer_.data(), buffer_.size());
-    }
-
-    bool IsEmpty() const {
-      return buffer_.size() == 0;
-    }
-
-    std::vector<uint8_t>* GetBuffer() {
-      return &buffer_;
-    }
-
-    void SetBuffer(const std::vector<uint8_t>& buffer) {
-      buffer_ = buffer;
-    }
-
-    std::vector<uintptr_t>* GetPatchLocations() {
-      return &patch_locations_;
-    }
-
-   private:
-    std::vector<uint8_t> buffer_;
-    std::vector<uintptr_t> patch_locations_;
-    bool patched_;
-    // User-provided function to do the actual patching.
-    PatchFn patch_;
-    // The section that we patch against (usually .text).
-    const Section* patch_base_section_;
-  };
-
-  // Writer of .rodata section or .text section.
-  // The write is done lazily using the provided CodeOutput.
-  class OatSection FINAL : public Section {
-   public:
-    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
-               const Section* link, Elf_Word info, Elf_Word align,
-               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
-        : Section(name, type, flags, link, info, align, entsize),
-          size_(size), code_output_(code_output) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return size_;
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      // The BufferedOutputStream class contains the buffer as field,
-      // therefore it is too big to allocate on the stack.
-      std::unique_ptr<BufferedOutputStream> output_stream(
-          new BufferedOutputStream(new FileOutputStream(elf_file)));
-      return code_output_->Write(output_stream.get());
-    }
-
-   private:
-    Elf_Word size_;
-    CodeOutput* code_output_;
-  };
-
-  // Writer of .bss section.
-  class NoBitsSection FINAL : public Section {
-   public:
-    NoBitsSection(const std::string& name, Elf_Word size)
-        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-          size_(size) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return size_;
-    }
-
-    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
-      LOG(ERROR) << "This section should not be written to the ELF file";
-      return false;
-    }
-
-   private:
-    Elf_Word size_;
-  };
-
   // Writer of .dynstr .strtab and .shstrtab sections.
-  class StrtabSection FINAL : public Section {
+  class StringSection FINAL : public Section {
    public:
-    StrtabSection(const std::string& name, Elf_Word flags)
-        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) {
-      buffer_.reserve(4 * KB);
-      // The first entry of strtab must be empty string.
-      buffer_ += '\0';
+    StringSection(ElfBuilder<ElfTypes>* owner, const std::string& name,
+                  Elf_Word flags, Elf_Word align)
+        : Section(owner, name, SHT_STRTAB, flags, nullptr, 0, align, 0),
+          current_offset_(0) {
     }
 
-    Elf_Word AddName(const std::string& name) {
-      Elf_Word offset = buffer_.size();
-      buffer_ += name;
-      buffer_ += '\0';
+    Elf_Word Write(const std::string& name) {
+      if (current_offset_ == 0) {
+        DCHECK(name.empty());
+      }
+      Elf_Word offset = current_offset_;
+      this->WriteFully(name.c_str(), name.length() + 1);
+      current_offset_ += name.length() + 1;
       return offset;
     }
 
-    Elf_Word GetSize() const OVERRIDE {
-      return buffer_.size();
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      return WriteArray(elf_file, buffer_.data(), buffer_.size());
-    }
-
    private:
-    std::string buffer_;
+    Elf_Word current_offset_;
   };
 
-  class HashSection;
-
   // Writer of .dynsym and .symtab sections.
-  class SymtabSection FINAL : public Section {
+  class SymbolSection FINAL : public Section {
    public:
-    // Add a symbol with given name to this symtab. The symbol refers to
-    // 'relative_addr' within the given section and has the given attributes.
-    void AddSymbol(const std::string& name, const Section* section,
-                   Elf_Addr addr, bool is_relative, Elf_Word size,
-                   uint8_t binding, uint8_t type, uint8_t other = 0) {
+    SymbolSection(ElfBuilder<ElfTypes>* owner, const std::string& name,
+                  Elf_Word type, Elf_Word flags, StringSection* strtab)
+        : Section(owner, name, type, flags, strtab, 0,
+                  sizeof(Elf_Off), sizeof(Elf_Sym)) {
+    }
+
+    // Buffer symbol for this section.  It will be written later.
+    void Add(Elf_Word name, const Section* section,
+             Elf_Addr addr, bool is_relative, Elf_Word size,
+             uint8_t binding, uint8_t type, uint8_t other = 0) {
       CHECK(section != nullptr);
-      Elf_Word name_idx = strtab_->AddName(name);
-      symbols_.push_back({ name, section, addr, size, is_relative,
-                           MakeStInfo(binding, type), other, name_idx });
+      Elf_Sym sym = Elf_Sym();
+      sym.st_name = name;
+      sym.st_value = addr + (is_relative ? section->GetAddress() : 0);
+      sym.st_size = size;
+      sym.st_other = other;
+      sym.st_shndx = section->GetSectionIndex();
+      sym.st_info = (binding << 4) + (type & 0xf);
+      symbols_.push_back(sym);
     }
 
-    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
-                  StrtabSection* strtab)
-        : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)),
-          strtab_(strtab) {
-    }
-
-    bool IsEmpty() const {
-      return symbols_.empty();
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      std::vector<Elf_Sym> buffer;
-      buffer.reserve(1u + symbols_.size());
-      buffer.push_back(Elf_Sym());  // NULL.
-      for (const ElfSymbolState& it : symbols_) {
-        Elf_Sym sym = Elf_Sym();
-        sym.st_name = it.name_idx_;
-        if (it.is_relative_) {
-          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
-        } else {
-          sym.st_value = it.addr_;
-        }
-        sym.st_size = it.size_;
-        sym.st_other = it.other_;
-        sym.st_shndx = it.section_->GetSectionIndex();
-        sym.st_info = it.info_;
-        buffer.push_back(sym);
-      }
-      return WriteArray(elf_file, buffer.data(), buffer.size());
+    void Write() {
+      // The symbol table always has to start with NULL symbol.
+      Elf_Sym null_symbol = Elf_Sym();
+      this->WriteFully(&null_symbol, sizeof(null_symbol));
+      this->WriteFully(symbols_.data(), symbols_.size() * sizeof(symbols_[0]));
+      symbols_.clear();
+      symbols_.shrink_to_fit();
     }
 
    private:
-    struct ElfSymbolState {
-      const std::string name_;
-      const Section* section_;
-      Elf_Addr addr_;
-      Elf_Word size_;
-      bool is_relative_;
-      uint8_t info_;
-      uint8_t other_;
-      Elf_Word name_idx_;  // index in the strtab.
-    };
-
-    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
-      return ((binding) << 4) + ((type) & 0xf);
-    }
-
-    // The symbols in the same order they will be in the symbol table.
-    std::vector<ElfSymbolState> symbols_;
-    StrtabSection* strtab_;
-
-    friend class HashSection;
+    std::vector<Elf_Sym> symbols_;
   };
 
-  // TODO: Consider removing.
-  // We use it only for the dynsym section which has only 5 symbols.
-  // We do not use it for symtab, and we probably do not have to
-  // since we use those symbols only to print backtraces.
-  class HashSection FINAL : public Section {
-   public:
-    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
-        : Section(name, SHT_HASH, flags, symtab,
-                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
-          symtab_(symtab) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      Elf_Word nbuckets = GetNumBuckets();
-      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
-      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
-    }
-
-    bool Write(File* const elf_file) OVERRIDE {
-      // Here is how The ELF hash table works.
-      // There are 3 arrays to worry about.
-      // * The symbol table where the symbol information is.
-      // * The bucket array which is an array of indexes into the symtab and chain.
-      // * The chain array which is also an array of indexes into the symtab and chain.
-      //
-      // Lets say the state is something like this.
-      // +--------+       +--------+      +-----------+
-      // | symtab |       | bucket |      |   chain   |
-      // |  null  |       | 1      |      | STN_UNDEF |
-      // | <sym1> |       | 4      |      | 2         |
-      // | <sym2> |       |        |      | 5         |
-      // | <sym3> |       |        |      | STN_UNDEF |
-      // | <sym4> |       |        |      | 3         |
-      // | <sym5> |       |        |      | STN_UNDEF |
-      // +--------+       +--------+      +-----------+
-      //
-      // The lookup process (in python psudocode) is
-      //
-      // def GetSym(name):
-      //     # NB STN_UNDEF == 0
-      //     indx = bucket[elfhash(name) % num_buckets]
-      //     while indx != STN_UNDEF:
-      //         if GetSymbolName(symtab[indx]) == name:
-      //             return symtab[indx]
-      //         indx = chain[indx]
-      //     return SYMBOL_NOT_FOUND
-      //
-      // Between bucket and chain arrays every symtab index must be present exactly
-      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
-      const auto& symbols = symtab_->symbols_;
-      // Select number of buckets.
-      // This is essentially arbitrary.
-      Elf_Word nbuckets = GetNumBuckets();
-      // 1 is for the implicit NULL symbol.
-      Elf_Word chain_size = (symbols.size() + 1);
-      std::vector<Elf_Word> hash;
-      hash.push_back(nbuckets);
-      hash.push_back(chain_size);
-      uint32_t bucket_offset = hash.size();
-      uint32_t chain_offset = bucket_offset + nbuckets;
-      hash.resize(hash.size() + nbuckets + chain_size, 0);
-
-      Elf_Word* buckets = hash.data() + bucket_offset;
-      Elf_Word* chain   = hash.data() + chain_offset;
-
-      // Set up the actual hash table.
-      for (Elf_Word i = 0; i < symbols.size(); i++) {
-        // Add 1 since we need to have the null symbol that is not in the symbols
-        // list.
-        Elf_Word index = i + 1;
-        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
-        if (buckets[hash_val] == 0) {
-          buckets[hash_val] = index;
-        } else {
-          hash_val = buckets[hash_val];
-          CHECK_LT(hash_val, chain_size);
-          while (chain[hash_val] != 0) {
-            hash_val = chain[hash_val];
-            CHECK_LT(hash_val, chain_size);
-          }
-          chain[hash_val] = index;
-          // Check for loops. Works because if this is non-empty then there must be
-          // another cell which already contains the same symbol index as this one,
-          // which means some symbol has more then one name, which isn't allowed.
-          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
-        }
-      }
-      return WriteArray(elf_file, hash.data(), hash.size());
-    }
-
-   private:
-    Elf_Word GetNumBuckets() const {
-      const auto& symbols = symtab_->symbols_;
-      if (symbols.size() < 8) {
-        return 2;
-      } else if (symbols.size() < 32) {
-        return 4;
-      } else if (symbols.size() < 256) {
-        return 16;
-      } else {
-        // Have about 32 ids per bucket.
-        return RoundUp(symbols.size()/32, 2);
-      }
-    }
-
-    // from bionic
-    static inline unsigned elfhash(const char *_name) {
-      const unsigned char *name = (const unsigned char *) _name;
-      unsigned h = 0, g;
-
-      while (*name) {
-        h = (h << 4) + *name++;
-        g = h & 0xf0000000;
-        h ^= g;
-        h ^= g >> 24;
-      }
-      return h;
-    }
-
-    SymtabSection* symtab_;
-
-    DISALLOW_COPY_AND_ASSIGN(HashSection);
-  };
-
-  ElfBuilder(InstructionSet isa,
-             Elf_Word rodata_size, CodeOutput* rodata_writer,
-             Elf_Word text_size, CodeOutput* text_writer,
-             Elf_Word bss_size)
+  ElfBuilder(InstructionSet isa, OutputStream* output)
     : isa_(isa),
-      dynstr_(".dynstr", SHF_ALLOC),
-      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(".hash", SHF_ALLOC, &dynsym_),
-      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
-              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
-      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
-            nullptr, 0, kPageSize, 0, text_size, text_writer),
-      bss_(".bss", bss_size),
-      dynamic_(".dynamic", &dynstr_),
-      strtab_(".strtab", 0),
-      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
-      shstrtab_(".shstrtab", 0) {
+      output_(output),
+      output_good_(true),
+      output_offset_(0),
+      rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+      text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+      bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+      dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+      dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+      hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+      dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+      eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+      eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+      strtab_(this, ".strtab", 0, kPageSize),
+      symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+      debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+      debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+      debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+      shstrtab_(this, ".shstrtab", 0, 1),
+      virtual_address_(0) {
+    text_.phdr_flags_ = PF_R | PF_X;
+    bss_.phdr_flags_ = PF_R | PF_W;
+    dynamic_.phdr_flags_ = PF_R | PF_W;
+    dynamic_.phdr_type_ = PT_DYNAMIC;
+    eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME;
   }
   ~ElfBuilder() {}
 
-  OatSection* GetText() { return &text_; }
-  SymtabSection* GetSymtab() { return &symtab_; }
+  InstructionSet GetIsa() { return isa_; }
+  Section* GetRoData() { return &rodata_; }
+  Section* GetText() { return &text_; }
+  Section* GetBss() { return &bss_; }
+  StringSection* GetStrTab() { return &strtab_; }
+  SymbolSection* GetSymTab() { return &symtab_; }
+  Section* GetEhFrame() { return &eh_frame_; }
+  Section* GetEhFrameHdr() { return &eh_frame_hdr_; }
+  Section* GetDebugFrame() { return &debug_frame_; }
+  Section* GetDebugInfo() { return &debug_info_; }
+  Section* GetDebugLine() { return &debug_line_; }
 
-  bool Write(File* elf_file) {
-    // Since the .text section of an oat file contains relative references to .rodata
-    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
-    // a non-traditional layout where the .bss section is mapped independently of the
-    // .dynamic section and needs its own program header with LOAD RW.
-    //
-    // The basic layout of the elf file. Order may be different in final output.
-    // +-------------------------+
-    // | Elf_Ehdr                |
-    // +-------------------------+
-    // | Elf_Phdr PHDR           |
-    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
-    // | Elf_Phdr LOAD R X       | .text
-    // | Elf_Phdr LOAD RW        | .bss (Optional)
-    // | Elf_Phdr LOAD RW        | .dynamic
-    // | Elf_Phdr DYNAMIC        | .dynamic
-    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
-    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
-    // +-------------------------+
-    // | .dynsym                 |
-    // | Elf_Sym  STN_UNDEF      |
-    // | Elf_Sym  oatdata        |
-    // | Elf_Sym  oatexec        |
-    // | Elf_Sym  oatlastword    |
-    // | Elf_Sym  oatbss         | (Optional)
-    // | Elf_Sym  oatbsslastword | (Optional)
-    // +-------------------------+
-    // | .dynstr                 |
-    // | names for .dynsym       |
-    // +-------------------------+
-    // | .hash                   |
-    // | hashtable for dynsym    |
-    // +-------------------------+
-    // | .rodata                 |
-    // | oatdata..oatexec-4      |
-    // +-------------------------+
-    // | .text                   |
-    // | oatexec..oatlastword    |
-    // +-------------------------+
-    // | .dynamic                |
-    // | Elf_Dyn DT_HASH         |
-    // | Elf_Dyn DT_STRTAB       |
-    // | Elf_Dyn DT_SYMTAB       |
-    // | Elf_Dyn DT_SYMENT       |
-    // | Elf_Dyn DT_STRSZ        |
-    // | Elf_Dyn DT_SONAME       |
-    // | Elf_Dyn DT_NULL         |
-    // +-------------------------+  (Optional)
-    // | .symtab                 |  (Optional)
-    // | program symbols         |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .strtab                 |  (Optional)
-    // | names for .symtab       |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .eh_frame               |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .eh_frame_hdr           |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_info             |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_abbrev           |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_str              |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_line             |  (Optional)
-    // +-------------------------+
-    // | .shstrtab               |
-    // | names of sections       |
-    // +-------------------------+
-    // | Elf_Shdr null           |
-    // | Elf_Shdr .dynsym        |
-    // | Elf_Shdr .dynstr        |
-    // | Elf_Shdr .hash          |
-    // | Elf_Shdr .rodata        |
-    // | Elf_Shdr .text          |
-    // | Elf_Shdr .bss           |  (Optional)
-    // | Elf_Shdr .dynamic       |
-    // | Elf_Shdr .symtab        |  (Optional)
-    // | Elf_Shdr .strtab        |  (Optional)
-    // | Elf_Shdr .eh_frame      |  (Optional)
-    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
-    // | Elf_Shdr .debug_info    |  (Optional)
-    // | Elf_Shdr .debug_abbrev  |  (Optional)
-    // | Elf_Shdr .debug_str     |  (Optional)
-    // | Elf_Shdr .debug_line    |  (Optional)
-    // | Elf_Shdr .oat_patches   |  (Optional)
-    // | Elf_Shdr .shstrtab      |
-    // +-------------------------+
-    constexpr bool debug_logging_ = false;
+  // Encode patch locations as LEB128 list of deltas between consecutive addresses.
+  // (exposed publicly for tests)
+  static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer) {
+    buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
+    uintptr_t address = 0;  // relative to start of section.
+    for (uintptr_t location : locations) {
+      DCHECK_GE(location, address) << "Patch locations are not in sorted order";
+      EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address));
+      address = location;
+    }
+  }
 
-    // Create a list of all section which we want to write.
-    // This is the order in which they will be written.
-    std::vector<Section*> sections;
-    sections.push_back(&dynsym_);
-    sections.push_back(&dynstr_);
-    sections.push_back(&hash_);
-    sections.push_back(&rodata_);
-    sections.push_back(&text_);
-    if (bss_.GetSize() != 0u) {
-      sections.push_back(&bss_);
-    }
-    sections.push_back(&dynamic_);
-    if (!symtab_.IsEmpty()) {
-      sections.push_back(&symtab_);
-      sections.push_back(&strtab_);
-    }
-    for (Section* section : other_sections_) {
-      sections.push_back(section);
-    }
-    sections.push_back(&shstrtab_);
-    for (size_t i = 0; i < sections.size(); i++) {
-      // The first section index is 1.  Index 0 is reserved for NULL.
-      // Section index is used for relative symbols and for section links.
-      sections[i]->SetSectionIndex(i + 1);
-      // Add section name to .shstrtab.
-      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
-      sections[i]->GetHeader()->sh_name = name_offset;
-    }
+  void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) {
+    std::vector<uint8_t> buffer;
+    EncodeOatPatches(patch_locations, &buffer);
+    std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
+    s->Start();
+    s->WriteFully(buffer.data(), buffer.size());
+    s->End();
+    other_sections_.push_back(std::move(s));
+  }
 
-    // The running program does not have access to section headers
-    // and the loader is not supposed to use them either.
-    // The dynamic sections therefore replicates some of the layout
-    // information like the address and size of .rodata and .text.
-    // It also contains other metadata like the SONAME.
-    // The .dynamic section is found using the PT_DYNAMIC program header.
-    BuildDynsymSection();
-    BuildDynamicSection(elf_file->GetPath());
+  void WriteSection(const char* name, const std::vector<uint8_t>* buffer) {
+    std::unique_ptr<Section> s(new Section(this, name, SHT_PROGBITS, 0, nullptr, 0, 1, 0));
+    s->Start();
+    s->WriteFully(buffer->data(), buffer->size());
+    s->End();
+    other_sections_.push_back(std::move(s));
+  }
 
-    // We do not know the number of headers until the final stages of write.
-    // It is easiest to just reserve a fixed amount of space for them.
-    constexpr size_t kMaxProgramHeaders = 8;
-    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
+  void Start() {
+    // Reserve space for ELF header and program headers.
+    // We do not know the number of headers until later, so
+    // it is easiest to just reserve a fixed amount of space.
+    int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
+    Seek(size, kSeekSet);
+    virtual_address_ += size;
+  }
 
-    // Layout of all sections - determine the final file offsets and addresses.
-    // This must be done after we have built all sections and know their size.
-    Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Elf_Addr load_address = file_offset;
-    std::vector<Elf_Shdr> section_headers;
-    section_headers.reserve(1u + sections.size());
-    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
-    for (auto* section : sections) {
-      Elf_Shdr* header = section->GetHeader();
-      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
-      header->sh_size = section->GetSize();
-      header->sh_link = section->GetLink();
-      // Allocate memory for the section in the file.
-      if (header->sh_type != SHT_NOBITS) {
-        header->sh_offset = RoundUp(file_offset, alignment);
-        file_offset = header->sh_offset + header->sh_size;
-      }
-      // Allocate memory for the section during program execution.
-      if ((header->sh_flags & SHF_ALLOC) != 0) {
-        header->sh_addr = RoundUp(load_address, alignment);
-        load_address = header->sh_addr + header->sh_size;
-      }
-      if (debug_logging_) {
-        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
-                  << " offset=0x" << header->sh_offset
-                  << " addr=0x" << header->sh_addr
-                  << " size=0x" << header->sh_size;
-      }
-      // Collect section headers into continuous array for convenience.
-      section_headers.push_back(*header);
-    }
-    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off));
-
-    // Create program headers now that we know the layout of the whole file.
-    // Each segment contains one or more sections which are mapped together.
-    // Not all sections are mapped during the execution of the program.
-    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
-    // interesting parts of memory and their addresses overlap with PT_LOAD.
-    std::vector<Elf_Phdr> program_headers;
-    program_headers.push_back(Elf_Phdr());  // Placeholder for PT_PHDR.
-    // Create the main LOAD R segment which spans all sections up to .rodata.
-    const Elf_Shdr* rodata = rodata_.GetHeader();
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
-      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
-    if (bss_.GetHeader()->sh_size != 0u) {
-      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
-    }
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
-    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
-    const Section* eh_frame = FindSection(".eh_frame");
-    if (eh_frame != nullptr) {
-      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
-      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
-      if (eh_frame_hdr != nullptr) {
-        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
-        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
-        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
-                 eh_frame_hdr->GetHeader()->sh_offset);
-        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
-        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
-        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
-        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
+  void End() {
+    // Write section names and finish the section headers.
+    shstrtab_.Start();
+    shstrtab_.Write("");
+    for (auto* section : sections_) {
+      section->header_.sh_name = shstrtab_.Write(section->name_);
+      if (section->link_ != nullptr) {
+        section->header_.sh_link = section->link_->GetSectionIndex();
       }
     }
-    DCHECK_EQ(program_headers[0].p_type, 0u);  // Check placeholder.
-    program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R,
-      kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off));
-    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
+    shstrtab_.End();
 
-    // Create the main ELF header.
+    // Write section headers at the end of the ELF file.
+    std::vector<Elf_Shdr> shdrs;
+    shdrs.reserve(1u + sections_.size());
+    shdrs.push_back(Elf_Shdr());  // NULL at index 0.
+    for (auto* section : sections_) {
+      shdrs.push_back(section->header_);
+    }
+    Elf_Off section_headers_offset;
+    section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off));
+    Seek(section_headers_offset, kSeekSet);
+    WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Write the initial file headers.
+    std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
     Elf_Ehdr elf_header = MakeElfHeader(isa_);
-    elf_header.e_phoff = kProgramHeadersOffset;
+    elf_header.e_phoff = sizeof(Elf_Ehdr);
     elf_header.e_shoff = section_headers_offset;
-    elf_header.e_phnum = program_headers.size();
-    elf_header.e_shnum = section_headers.size();
+    elf_header.e_phnum = phdrs.size();
+    elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
-
-    // Write all headers and section content to the file.
-    // Depending on the implementations of Section::Write, this
-    // might be just memory copies or some more elaborate operations.
-    if (!WriteArray(elf_file, &elf_header, 1)) {
-      LOG(INFO) << "Failed to write the ELF header";
-      return false;
-    }
-    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
-      LOG(INFO) << "Failed to write the program headers";
-      return false;
-    }
-    for (Section* section : sections) {
-      const Elf_Shdr* header = section->GetHeader();
-      if (header->sh_type != SHT_NOBITS) {
-        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
-          LOG(INFO) << "Failed to write section " << section->GetName();
-          return false;
-        }
-        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
-        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
-          << "The number of bytes written does not match GetSize()";
-      }
-    }
-    if (!SeekTo(elf_file, section_headers_offset) ||
-        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
-      LOG(INFO) << "Failed to write the section headers";
-      return false;
-    }
-    return true;
+    Seek(0, kSeekSet);
+    WriteFully(&elf_header, sizeof(elf_header));
+    WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    Flush();
   }
 
-  // Adds the given section to the builder.  It does not take ownership.
-  void RegisterSection(Section* section) {
-    other_sections_.push_back(section);
+  // The running program does not have access to section headers
+  // and the loader is not supposed to use them either.
+  // The dynamic sections therefore replicates some of the layout
+  // information like the address and size of .rodata and .text.
+  // It also contains other metadata like the SONAME.
+  // The .dynamic section is found using the PT_DYNAMIC program header.
+  void WriteDynamicSection(const std::string& elf_file_path) {
+    std::string soname(elf_file_path);
+    size_t directory_separator_pos = soname.rfind('/');
+    if (directory_separator_pos != std::string::npos) {
+      soname = soname.substr(directory_separator_pos + 1);
+    }
+
+    dynstr_.Start();
+    dynstr_.Write("");  // dynstr should start with empty string.
+    dynsym_.Add(dynstr_.Write("oatdata"), &rodata_, 0, true,
+                rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
+    if (text_.GetSize() != 0u) {
+      dynsym_.Add(dynstr_.Write("oatexec"), &text_, 0, true,
+                  text_.GetSize(), STB_GLOBAL, STT_OBJECT);
+      dynsym_.Add(dynstr_.Write("oatlastword"), &text_, text_.GetSize() - 4,
+                  true, 4, STB_GLOBAL, STT_OBJECT);
+    } else if (rodata_.GetSize() != 0) {
+      // rodata_ can be size 0 for dwarf_test.
+      dynsym_.Add(dynstr_.Write("oatlastword"), &rodata_, rodata_.GetSize() - 4,
+                  true, 4, STB_GLOBAL, STT_OBJECT);
+    }
+    if (bss_.finished_) {
+      dynsym_.Add(dynstr_.Write("oatbss"), &bss_,
+                  0, true, bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
+      dynsym_.Add(dynstr_.Write("oatbsslastword"), &bss_,
+                  bss_.GetSize() - 4, true, 4, STB_GLOBAL, STT_OBJECT);
+    }
+    Elf_Word soname_offset = dynstr_.Write(soname);
+    dynstr_.End();
+
+    dynsym_.Start();
+    dynsym_.Write();
+    dynsym_.End();
+
+    // We do not really need a hash-table since there is so few entries.
+    // However, the hash-table is the only way the linker can actually
+    // determine the number of symbols in .dynsym so it is required.
+    hash_.Start();
+    int count = dynsym_.GetSize() / sizeof(Elf_Sym);  // Includes NULL.
+    std::vector<Elf_Word> hash;
+    hash.push_back(1);  // Number of buckets.
+    hash.push_back(count);  // Number of chains.
+    // Buckets.  Having just one makes it linear search.
+    hash.push_back(1);  // Point to first non-NULL symbol.
+    // Chains.  This creates linked list of symbols.
+    hash.push_back(0);  // Dummy entry for the NULL symbol.
+    for (int i = 1; i < count - 1; i++) {
+      hash.push_back(i + 1);  // Each symbol points to the next one.
+    }
+    hash.push_back(0);  // Last symbol terminates the chain.
+    hash_.WriteFully(hash.data(), hash.size() * sizeof(hash[0]));
+    hash_.End();
+
+    dynamic_.Start();
+    Elf_Dyn dyns[] = {
+      { DT_HASH, { hash_.GetAddress() } },
+      { DT_STRTAB, { dynstr_.GetAddress() } },
+      { DT_SYMTAB, { dynsym_.GetAddress() } },
+      { DT_SYMENT, { sizeof(Elf_Sym) } },
+      { DT_STRSZ, { dynstr_.GetSize() } },
+      { DT_SONAME, { soname_offset } },
+      { DT_NULL, { 0 } },
+    };
+    dynamic_.WriteFully(&dyns, sizeof(dyns));
+    dynamic_.End();
   }
 
-  const Section* FindSection(const char* name) {
-    for (const auto* section : other_sections_) {
-      if (section->GetName() == name) {
-        return section;
-      }
-    }
-    return nullptr;
+  // Returns true if all writes and seeks on the output stream succeeded.
+  bool Good() {
+    return output_good_;
   }
 
  private:
-  static bool SeekTo(File* elf_file, Elf_Word offset) {
-    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
-      << "Seeking backwards";
-    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
-      return false;
-    }
-    return true;
-  }
-
-  template<typename T>
-  static bool WriteArray(File* elf_file, const T* data, size_t count) {
-    if (count != 0) {
-      DCHECK(data != nullptr);
-      if (!elf_file->WriteFully(data, count * sizeof(T))) {
-        PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
-        return false;
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  void WriteFully(const void* buffer, size_t byte_count) {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to ELF file at offset " << output_offset_;
+        output_good_ = false;
       }
     }
-    return true;
+    output_offset_ += byte_count;
   }
 
-  // Helper - create segment header based on memory range.
-  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
-                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
-    Elf_Phdr phdr = Elf_Phdr();
-    phdr.p_type    = type;
-    phdr.p_flags   = flags;
-    phdr.p_offset  = offset;
-    phdr.p_vaddr   = offset;
-    phdr.p_paddr   = offset;
-    phdr.p_filesz  = size;
-    phdr.p_memsz   = size;
-    phdr.p_align   = align;
-    return phdr;
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == (off_t)-1) {
+        PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
   }
 
-  // Helper - create segment header based on section header.
-  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
-                                    const Section& section) {
-    const Elf_Shdr* shdr = section.GetHeader();
-    // Only run-time allocated sections should be in segment headers.
-    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
-    Elf_Phdr phdr = Elf_Phdr();
-    phdr.p_type   = type;
-    phdr.p_flags  = flags;
-    phdr.p_offset = shdr->sh_offset;
-    phdr.p_vaddr  = shdr->sh_addr;
-    phdr.p_paddr  = shdr->sh_addr;
-    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
-    phdr.p_memsz  = shdr->sh_size;
-    phdr.p_align  = shdr->sh_addralign;
-    return phdr;
+  bool Flush() {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
   }
 
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
@@ -869,6 +568,10 @@
       }
       case kNone: {
         LOG(FATAL) << "No instruction set";
+        break;
+      }
+      default: {
+        LOG(FATAL) << "Unknown instruction set " << isa;
       }
     }
 
@@ -892,50 +595,112 @@
     return elf_header;
   }
 
-  void BuildDynamicSection(const std::string& elf_file_path) {
-    std::string soname(elf_file_path);
-    size_t directory_separator_pos = soname.rfind('/');
-    if (directory_separator_pos != std::string::npos) {
-      soname = soname.substr(directory_separator_pos + 1);
+  // Create program headers based on written sections.
+  std::vector<Elf_Phdr> MakeProgramHeaders() {
+    CHECK(!sections_.empty());
+    std::vector<Elf_Phdr> phdrs;
+    {
+      // The program headers must start with PT_PHDR which is used in
+      // loaded process to determine the number of program headers.
+      Elf_Phdr phdr = Elf_Phdr();
+      phdr.p_type    = PT_PHDR;
+      phdr.p_flags   = PF_R;
+      phdr.p_offset  = phdr.p_vaddr = phdr.p_paddr = sizeof(Elf_Ehdr);
+      phdr.p_filesz  = phdr.p_memsz = 0;  // We need to fill this later.
+      phdr.p_align   = sizeof(Elf_Off);
+      phdrs.push_back(phdr);
+      // Tell the linker to mmap the start of file to memory.
+      Elf_Phdr load = Elf_Phdr();
+      load.p_type    = PT_LOAD;
+      load.p_flags   = PF_R;
+      load.p_offset  = load.p_vaddr = load.p_paddr = 0;
+      load.p_filesz  = load.p_memsz = sections_[0]->header_.sh_offset;
+      load.p_align   = kPageSize;
+      phdrs.push_back(load);
     }
-    // NB: We must add the name before adding DT_STRSZ.
-    Elf_Word soname_offset = dynstr_.AddName(soname);
-
-    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
-    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
-    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
-    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
-    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
-    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
-  }
-
-  void BuildDynsymSection() {
-    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
-                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_.AddSymbol("oatexec", &text_, 0, true,
-                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
-                      true, 4, STB_GLOBAL, STT_OBJECT);
-    if (bss_.GetSize() != 0u) {
-      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
-                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
-      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
-                        true, 4, STB_GLOBAL, STT_OBJECT);
+    // Create program headers for sections.
+    for (auto* section : sections_) {
+      const Elf_Shdr& shdr = section->header_;
+      if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        // PT_LOAD tells the linker to mmap part of the file.
+        // The linker can only mmap page-aligned sections.
+        // Single PT_LOAD may contain several ELF sections.
+        Elf_Phdr& prev = phdrs.back();
+        Elf_Phdr load = Elf_Phdr();
+        load.p_type   = PT_LOAD;
+        load.p_flags  = section->phdr_flags_;
+        load.p_offset = shdr.sh_offset;
+        load.p_vaddr  = load.p_paddr = shdr.sh_addr;
+        load.p_filesz = (shdr.sh_type != SHT_NOBITS ? shdr.sh_size : 0u);
+        load.p_memsz  = shdr.sh_size;
+        load.p_align  = shdr.sh_addralign;
+        if (prev.p_type == load.p_type &&
+            prev.p_flags == load.p_flags &&
+            prev.p_filesz == prev.p_memsz &&  // Do not merge .bss
+            load.p_filesz == load.p_memsz) {  // Do not merge .bss
+          // Merge this PT_LOAD with the previous one.
+          Elf_Word size = shdr.sh_offset + shdr.sh_size - prev.p_offset;
+          prev.p_filesz = size;
+          prev.p_memsz  = size;
+        } else {
+          // If we are adding new load, it must be aligned.
+          CHECK_EQ(shdr.sh_addralign, (Elf_Word)kPageSize);
+          phdrs.push_back(load);
+        }
+      }
     }
+    for (auto* section : sections_) {
+      const Elf_Shdr& shdr = section->header_;
+      if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        // Other PT_* types allow the program to locate interesting
+        // parts of memory at runtime. They must overlap with PT_LOAD.
+        if (section->phdr_type_ != 0) {
+          Elf_Phdr phdr = Elf_Phdr();
+          phdr.p_type   = section->phdr_type_;
+          phdr.p_flags  = section->phdr_flags_;
+          phdr.p_offset = shdr.sh_offset;
+          phdr.p_vaddr  = phdr.p_paddr = shdr.sh_addr;
+          phdr.p_filesz = phdr.p_memsz = shdr.sh_size;
+          phdr.p_align  = shdr.sh_addralign;
+          phdrs.push_back(phdr);
+        }
+      }
+    }
+    // Set the size of the initial PT_PHDR.
+    CHECK_EQ(phdrs[0].p_type, (Elf_Word)PT_PHDR);
+    phdrs[0].p_filesz = phdrs[0].p_memsz = phdrs.size() * sizeof(Elf_Phdr);
+
+    return phdrs;
   }
 
   InstructionSet isa_;
-  StrtabSection dynstr_;
-  SymtabSection dynsym_;
-  HashSection hash_;
-  OatSection rodata_;
-  OatSection text_;
-  NoBitsSection bss_;
-  DynamicSection dynamic_;
-  StrtabSection strtab_;
-  SymtabSection symtab_;
-  std::vector<Section*> other_sections_;
-  StrtabSection shstrtab_;
+
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+
+  Section rodata_;
+  Section text_;
+  Section bss_;
+  StringSection dynstr_;
+  SymbolSection dynsym_;
+  Section hash_;
+  Section dynamic_;
+  Section eh_frame_;
+  Section eh_frame_hdr_;
+  StringSection strtab_;
+  SymbolSection symtab_;
+  Section debug_frame_;
+  Section debug_info_;
+  Section debug_line_;
+  StringSection shstrtab_;
+  std::vector<std::unique_ptr<Section>> other_sections_;
+
+  // List of used section in the order in which they were written.
+  std::vector<Section*> sections_;
+
+  // Used for allocation of virtual address space.
+  Elf_Addr virtual_address_;
 
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03f8ceb..357d5f6 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -25,13 +25,16 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "os.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CompilerDriver;
-class DexFile;
 class ElfFile;
-class OatWriter;
+class OutputStream;
+
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
 
 class ElfWriter {
  public:
@@ -46,21 +49,21 @@
 
   static bool Fixup(File* file, uintptr_t oat_data_begin);
 
- protected:
-  ElfWriter(const CompilerDriver& driver, File* elf_file)
-    : compiler_driver_(&driver), elf_file_(elf_file) {
-  }
-
   virtual ~ElfWriter() {}
 
-  virtual bool Write(OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void Start() = 0;
+  virtual OutputStream* StartRoData() = 0;
+  virtual void EndRoData(OutputStream* rodata) = 0;
+  virtual OutputStream* StartText() = 0;
+  virtual void EndText(OutputStream* text) = 0;
+  virtual void SetBssSize(size_t bss_size) = 0;
+  virtual void WriteDynamicSection() = 0;
+  virtual void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) = 0;
+  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
+  virtual bool End() = 0;
 
-  const CompilerDriver* const compiler_driver_;
-  File* const elf_file_;
+ protected:
+  ElfWriter() = default;
 };
 
 }  // namespace art
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 3a9e312..9dc6565 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -17,23 +17,66 @@
 #include "elf_writer_debug.h"
 
 #include <unordered_set>
+#include <vector>
 
 #include "base/casts.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
 #include "dex_file-inl.h"
+#include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
+#include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
+#include "elf_builder.h"
 #include "oat_writer.h"
 #include "utils.h"
+#include "stack_map.h"
 
 namespace art {
 namespace dwarf {
 
-static void WriteDebugFrameCIE(InstructionSet isa,
-                               ExceptionHeaderValueApplication addr_type,
-                               CFIFormat format,
-                               std::vector<uint8_t>* eh_frame) {
+static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmCore(machine_reg);
+    case kArm64:
+      return Reg::Arm64Core(machine_reg);
+    case kX86:
+      return Reg::X86Core(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Core(machine_reg);
+    case kMips:
+      return Reg::MipsCore(machine_reg);
+    case kMips64:
+      return Reg::Mips64Core(machine_reg);
+    default:
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      UNREACHABLE();
+  }
+}
+
+static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmFp(machine_reg);
+    case kArm64:
+      return Reg::Arm64Fp(machine_reg);
+    case kX86:
+      return Reg::X86Fp(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Fp(machine_reg);
+    default:
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      UNREACHABLE();
+  }
+}
+
+static void WriteCIE(InstructionSet isa,
+                     CFIFormat format,
+                     std::vector<uint8_t>* buffer) {
   // Scratch registers should be marked as undefined.  This tells the
   // debugger that its value in the previous frame is not recoverable.
   bool is64bit = Is64BitInstructionSet(isa);
@@ -59,8 +102,7 @@
         }
       }
       auto return_reg = Reg::ArmCore(14);  // R14(LR).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kArm64: {
@@ -83,8 +125,7 @@
         }
       }
       auto return_reg = Reg::Arm64Core(30);  // R30(LR).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kMips:
@@ -100,8 +141,7 @@
         }
       }
       auto return_reg = Reg::MipsCore(31);  // R31(RA).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kX86: {
@@ -127,8 +167,7 @@
         }
       }
       auto return_reg = Reg::X86Core(8);  // R8(EIP).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kX86_64: {
@@ -154,8 +193,7 @@
         }
       }
       auto return_reg = Reg::X86_64Core(16);  // R16(RIP).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kNone:
@@ -165,36 +203,69 @@
   UNREACHABLE();
 }
 
-void WriteCFISection(const CompilerDriver* compiler,
-                     const OatWriter* oat_writer,
-                     ExceptionHeaderValueApplication address_type,
-                     CFIFormat format,
-                     std::vector<uint8_t>* debug_frame,
-                     std::vector<uintptr_t>* debug_frame_patches,
-                     std::vector<uint8_t>* eh_frame_hdr,
-                     std::vector<uintptr_t>* eh_frame_hdr_patches) {
-  const auto& method_infos = oat_writer->GetMethodDebugInfo();
-  const InstructionSet isa = compiler->GetInstructionSet();
+template<typename ElfTypes>
+void WriteCFISection(ElfBuilder<ElfTypes>* builder,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
+                     CFIFormat format) {
+  CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
+        format == dwarf::DW_EH_FRAME_FORMAT);
+  typedef typename ElfTypes::Addr Elf_Addr;
+
+  std::vector<uint32_t> binary_search_table;
+  std::vector<uintptr_t> patch_locations;
+  if (format == DW_EH_FRAME_FORMAT) {
+    binary_search_table.reserve(2 * method_infos.size());
+  } else {
+    patch_locations.reserve(method_infos.size());
+  }
 
   // Write .eh_frame/.debug_frame section.
-  std::map<uint32_t, size_t> address_to_fde_offset_map;
-  size_t cie_offset = debug_frame->size();
-  WriteDebugFrameCIE(isa, address_type, format, debug_frame);
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    if (!mi.deduped_) {  // Only one FDE per unique address.
-      ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
-      if (!opcodes.empty()) {
-        address_to_fde_offset_map.emplace(mi.low_pc_, debug_frame->size());
-        WriteDebugFrameFDE(Is64BitInstructionSet(isa), cie_offset,
-                           mi.low_pc_, mi.high_pc_ - mi.low_pc_,
-                           opcodes, format, debug_frame, debug_frame_patches);
+  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
+                       ? builder->GetDebugFrame()
+                       : builder->GetEhFrame());
+  {
+    cfi_section->Start();
+    const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
+    const Elf_Addr text_address = builder->GetText()->GetAddress();
+    const Elf_Addr cfi_address = cfi_section->GetAddress();
+    const Elf_Addr cie_address = cfi_address;
+    Elf_Addr buffer_address = cfi_address;
+    std::vector<uint8_t> buffer;  // Small temporary buffer.
+    WriteCIE(builder->GetIsa(), format, &buffer);
+    cfi_section->WriteFully(buffer.data(), buffer.size());
+    buffer_address += buffer.size();
+    buffer.clear();
+    for (const MethodDebugInfo& mi : method_infos) {
+      if (!mi.deduped_) {  // Only one FDE per unique address.
+        ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
+        if (!opcodes.empty()) {
+          const Elf_Addr code_address = text_address + mi.low_pc_;
+          if (format == DW_EH_FRAME_FORMAT) {
+            binary_search_table.push_back(
+                dchecked_integral_cast<uint32_t>(code_address));
+            binary_search_table.push_back(
+                dchecked_integral_cast<uint32_t>(buffer_address));
+          }
+          WriteFDE(is64bit, cfi_address, cie_address,
+                   code_address, mi.high_pc_ - mi.low_pc_,
+                   opcodes, format, buffer_address, &buffer,
+                   &patch_locations);
+          cfi_section->WriteFully(buffer.data(), buffer.size());
+          buffer_address += buffer.size();
+          buffer.clear();
+        }
       }
     }
+    cfi_section->End();
   }
 
   if (format == DW_EH_FRAME_FORMAT) {
+    auto* header_section = builder->GetEhFrameHdr();
+    header_section->Start();
+    uint32_t header_address = dchecked_integral_cast<int32_t>(header_section->GetAddress());
     // Write .eh_frame_hdr section.
-    Writer<> header(eh_frame_hdr);
+    std::vector<uint8_t> buffer;
+    Writer<> header(&buffer);
     header.PushUint8(1);  // Version.
     // Encoding of .eh_frame pointer - libunwind does not honor datarel here,
     // so we have to use pcrel which means relative to the pointer's location.
@@ -204,114 +275,522 @@
     // Encoding of binary search table addresses - libunwind supports only this
     // specific combination, which means relative to the start of .eh_frame_hdr.
     header.PushUint8(DW_EH_PE_datarel | DW_EH_PE_sdata4);
-    // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section
-    const int32_t relative_eh_frame_begin = -static_cast<int32_t>(debug_frame->size());
-    header.PushInt32(relative_eh_frame_begin - 4U);
+    // .eh_frame pointer
+    header.PushInt32(cfi_section->GetAddress() - (header_address + 4u));
     // Binary search table size (number of entries).
-    header.PushUint32(dchecked_integral_cast<uint32_t>(address_to_fde_offset_map.size()));
+    header.PushUint32(dchecked_integral_cast<uint32_t>(binary_search_table.size()/2));
+    header_section->WriteFully(buffer.data(), buffer.size());
     // Binary search table.
-    for (const auto& address_to_fde_offset : address_to_fde_offset_map) {
-      u_int32_t code_address = address_to_fde_offset.first;
-      int32_t fde_address = dchecked_integral_cast<int32_t>(address_to_fde_offset.second);
-      eh_frame_hdr_patches->push_back(header.data()->size());
-      header.PushUint32(code_address);
-      // We know the exact layout (eh_frame is immediately before eh_frame_hdr)
-      // and the data is relative to the start of the eh_frame_hdr,
-      // so patching isn't necessary (in contrast to the code address above).
-      header.PushInt32(relative_eh_frame_begin + fde_address);
+    for (size_t i = 0; i < binary_search_table.size(); i++) {
+      // Make addresses section-relative since we know the header address now.
+      binary_search_table[i] -= header_address;
     }
+    header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
+    header_section->End();
+  } else {
+    builder->WritePatches(".debug_frame.oat_patches",
+                          ArrayRef<const uintptr_t>(patch_locations));
   }
 }
 
-/*
- * @brief Generate the DWARF sections.
- * @param oat_writer The Oat file Writer.
- * @param eh_frame Call Frame Information.
- * @param debug_info Compilation unit information.
- * @param debug_info_patches Address locations to be patched.
- * @param debug_abbrev Abbreviations used to generate dbg_info.
- * @param debug_str Debug strings.
- * @param debug_line Line number table.
- * @param debug_line_patches Address locations to be patched.
- */
-void WriteDebugSections(const CompilerDriver* compiler,
-                        const OatWriter* oat_writer,
-                        std::vector<uint8_t>* debug_info,
-                        std::vector<uintptr_t>* debug_info_patches,
-                        std::vector<uint8_t>* debug_abbrev,
-                        std::vector<uint8_t>* debug_str,
-                        std::vector<uint8_t>* debug_line,
-                        std::vector<uintptr_t>* debug_line_patches) {
-  const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo();
-  const InstructionSet isa = compiler->GetInstructionSet();
-  const bool is64bit = Is64BitInstructionSet(isa);
+struct CompilationUnit {
+  std::vector<const MethodDebugInfo*> methods_;
+  size_t debug_line_offset_ = 0;
+  uint32_t low_pc_ = 0xFFFFFFFFU;
+  uint32_t high_pc_ = 0;
+};
 
-  // Find all addresses (low_pc) which contain deduped methods.
-  // The first instance of method is not marked deduped_, but the rest is.
-  std::unordered_set<uint32_t> deduped_addresses;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    if (mi.deduped_) {
-      deduped_addresses.insert(mi.low_pc_);
+// Helper class to write .debug_info and its supporting sections.
+template<typename ElfTypes>
+class DebugInfoWriter {
+  typedef typename ElfTypes::Addr Elf_Addr;
+
+  // Helper class to write one compilation unit.
+  // It holds helper methods and temporary state.
+  class CompilationUnitWriter {
+   public:
+    explicit CompilationUnitWriter(DebugInfoWriter* owner)
+      : owner_(owner),
+        info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &debug_abbrev_) {
     }
+
+    void Write(const CompilationUnit& compilation_unit) {
+      CHECK(!compilation_unit.methods_.empty());
+      const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress();
+
+      info_.StartTag(DW_TAG_compile_unit);
+      info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
+      info_.WriteData1(DW_AT_language, DW_LANG_Java);
+      info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
+      info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+      info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
+
+      const char* last_dex_class_desc = nullptr;
+      for (auto mi : compilation_unit.methods_) {
+        const DexFile* dex = mi->dex_file_;
+        const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index_);
+        const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
+        const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
+        const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method);
+
+        // Enclose the method in correct class definition.
+        if (last_dex_class_desc != dex_class_desc) {
+          if (last_dex_class_desc != nullptr) {
+            EndClassTag(last_dex_class_desc);
+          }
+          size_t offset = StartClassTag(dex_class_desc);
+          type_cache_.emplace(dex_class_desc, offset);
+          // Check that each class is defined only once.
+          bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second;
+          CHECK(unique) << "Redefinition of " << dex_class_desc;
+          last_dex_class_desc = dex_class_desc;
+        }
+
+        std::vector<const char*> param_names;
+        if (mi->code_item_ != nullptr) {
+          const uint8_t* stream = dex->GetDebugInfoStream(mi->code_item_);
+          if (stream != nullptr) {
+            DecodeUnsignedLeb128(&stream);  // line.
+            uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+            for (uint32_t i = 0; i < parameters_size; ++i) {
+              uint32_t id = DecodeUnsignedLeb128P1(&stream);
+              param_names.push_back(mi->dex_file_->StringDataByIdx(id));
+            }
+          }
+        }
+
+        int start_depth = info_.Depth();
+        info_.StartTag(DW_TAG_subprogram);
+        WriteName(dex->GetMethodName(dex_method));
+        info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
+        info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+        uint8_t frame_base[] = { DW_OP_call_frame_cfa };
+        info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
+        WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
+        if (dex_params != nullptr) {
+          uint32_t vreg = mi->code_item_ == nullptr ? 0 :
+              mi->code_item_->registers_size_ - mi->code_item_->ins_size_;
+          if ((mi->access_flags_ & kAccStatic) == 0) {
+            info_.StartTag(DW_TAG_formal_parameter);
+            WriteName("this");
+            info_.WriteFlag(DW_AT_artificial, true);
+            WriteLazyType(dex_class_desc);
+            const bool is64bitValue = false;
+            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+            vreg++;
+            info_.EndTag();
+          }
+          for (uint32_t i = 0; i < dex_params->Size(); ++i) {
+            info_.StartTag(DW_TAG_formal_parameter);
+            // Parameter names may not be always available.
+            if (i < param_names.size() && param_names[i] != nullptr) {
+              WriteName(param_names[i]);
+            }
+            // Write the type.
+            const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_);
+            WriteLazyType(type_desc);
+            // Write the stack location of the parameter.
+            const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J';
+            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+            vreg += is64bitValue ? 2 : 1;
+            info_.EndTag();
+          }
+          if (mi->code_item_ != nullptr) {
+            CHECK_EQ(vreg, mi->code_item_->registers_size_);
+          }
+        }
+        info_.EndTag();
+        CHECK_EQ(info_.Depth(), start_depth);  // Balanced start/end.
+      }
+      if (last_dex_class_desc != nullptr) {
+        EndClassTag(last_dex_class_desc);
+      }
+      CHECK_EQ(info_.Depth(), 1);
+      FinishLazyTypes();
+      info_.EndTag();  // DW_TAG_compile_unit
+      std::vector<uint8_t> buffer;
+      buffer.reserve(info_.data()->size() + KB);
+      const size_t offset = owner_->builder_->GetDebugInfo()->GetSize();
+      const size_t debug_abbrev_offset =
+          owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size());
+      WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_);
+      owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
+    }
+
+    // Write table into .debug_loc which describes location of dex register.
+    // The dex register might be valid only at some points and it might
+    // move between machine registers and stack.
+    void WriteRegLocation(const MethodDebugInfo* method_info, uint16_t vreg,
+                          bool is64bitValue, uint32_t compilation_unit_low_pc) {
+      using Kind = DexRegisterLocation::Kind;
+      bool is_optimizing = method_info->compiled_method_->GetQuickCode().size() > 0 &&
+                           method_info->compiled_method_->GetVmapTable().size() > 0 &&
+                           method_info->compiled_method_->GetGcMap().size() == 0 &&
+                           method_info->code_item_ != nullptr;
+      if (!is_optimizing) {
+        return;
+      }
+
+      Writer<> writer(&owner_->debug_loc_);
+      info_.WriteSecOffset(DW_AT_location, writer.size());
+
+      const InstructionSet isa = owner_->builder_->GetIsa();
+      const bool is64bit = Is64BitInstructionSet(isa);
+      const CodeInfo code_info(method_info->compiled_method_->GetVmapTable().data());
+      const StackMapEncoding encoding = code_info.ExtractEncoding();
+      DexRegisterLocation last_reg_lo = DexRegisterLocation::None();
+      DexRegisterLocation last_reg_hi = DexRegisterLocation::None();
+      size_t offset_of_last_end_address = 0;
+      for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
+        StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+        DCHECK(stack_map.IsValid());
+
+        // Find the location of the dex register.
+        DexRegisterLocation reg_lo = DexRegisterLocation::None();
+        DexRegisterLocation reg_hi = DexRegisterLocation::None();
+        if (stack_map.HasDexRegisterMap(encoding)) {
+          DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(
+              stack_map, encoding, method_info->code_item_->registers_size_);
+          reg_lo = dex_register_map.GetDexRegisterLocation(
+              vreg, method_info->code_item_->registers_size_, code_info, encoding);
+          if (is64bitValue) {
+            reg_hi = dex_register_map.GetDexRegisterLocation(
+                vreg + 1, method_info->code_item_->registers_size_, code_info, encoding);
+          }
+        }
+        if ((reg_lo == last_reg_lo && reg_hi == last_reg_hi) ||
+            reg_lo.GetKind() == Kind::kNone) {
+          // Skip identical or undefined locations.
+          continue;
+        }
+        last_reg_lo = reg_lo;
+        last_reg_hi = reg_hi;
+
+        // Translate dex register location to DWARF expression.
+        // Note that 64-bit value might be split to two distinct locations.
+        // (for example, two 32-bit machine registers, or even stack and register)
+        uint8_t buffer[64];
+        uint8_t* pos = buffer;
+        for (int piece = 0; piece < (is64bitValue ? 2 : 1); piece++) {
+          DexRegisterLocation reg_loc = (piece == 0 ? reg_lo : reg_hi);
+          const Kind kind = reg_loc.GetKind();
+          const int32_t value = reg_loc.GetValue();
+          if (kind == Kind::kInStack) {
+            const size_t frame_size = method_info->compiled_method_->GetFrameSizeInBytes();
+            *(pos++) = DW_OP_fbreg;
+            // The stack offset is relative to SP. Make it relative to CFA.
+            pos = EncodeSignedLeb128(pos, value - frame_size);
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInStack &&
+                reg_hi.GetValue() == value + 4) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kInRegister) {
+            pos = WriteOpReg(pos, GetDwarfCoreReg(isa, value).num());
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInRegisterHigh &&
+                reg_hi.GetValue() == value) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kInFpuRegister) {
+            if ((isa == kArm || isa == kThumb2) &&
+                piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegister &&
+                reg_hi.GetValue() == value + 1 && value % 2 == 0) {
+              // Translate S register pair to D register (e.g. S4+S5 to D2).
+              pos = WriteOpReg(pos, Reg::ArmDp(value / 2).num());
+              break;
+            }
+            if (isa == kMips || isa == kMips64) {
+              // TODO: Find what the DWARF floating point register numbers are on MIPS.
+              break;
+            }
+            pos = WriteOpReg(pos, GetDwarfFpReg(isa, value).num());
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
+                reg_hi.GetValue() == reg_lo.GetValue()) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kConstant) {
+            *(pos++) = DW_OP_consts;
+            pos = EncodeSignedLeb128(pos, value);
+            *(pos++) = DW_OP_stack_value;
+          } else if (kind == Kind::kNone) {
+            break;
+          } else {
+            // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind().
+            // kInRegisterHigh and kInFpuRegisterHigh should be handled by
+            // the special cases above and they should not occur alone.
+            LOG(ERROR) << "Unexpected register location kind: "
+                       << DexRegisterLocation::PrettyDescriptor(kind);
+            break;
+          }
+          if (is64bitValue) {
+            // Write the marker which is needed by split 64-bit values.
+            // This code is skipped by the special cases.
+            *(pos++) = DW_OP_piece;
+            pos = EncodeUnsignedLeb128(pos, 4);
+          }
+        }
+
+        // Write end address for previous entry.
+        const uint32_t pc = method_info->low_pc_ + stack_map.GetNativePcOffset(encoding);
+        if (offset_of_last_end_address != 0) {
+          if (is64bit) {
+            writer.UpdateUint64(offset_of_last_end_address, pc - compilation_unit_low_pc);
+          } else {
+            writer.UpdateUint32(offset_of_last_end_address, pc - compilation_unit_low_pc);
+          }
+        }
+        offset_of_last_end_address = 0;
+
+        DCHECK_LE(static_cast<size_t>(pos - buffer), sizeof(buffer));
+        if (pos > buffer) {
+          // Write start/end address.
+          if (is64bit) {
+            writer.PushUint64(pc - compilation_unit_low_pc);
+            offset_of_last_end_address = writer.size();
+            writer.PushUint64(method_info->high_pc_ - compilation_unit_low_pc);
+          } else {
+            writer.PushUint32(pc - compilation_unit_low_pc);
+            offset_of_last_end_address = writer.size();
+            writer.PushUint32(method_info->high_pc_ - compilation_unit_low_pc);
+          }
+          // Write the expression.
+          writer.PushUint16(pos - buffer);
+          writer.PushData(buffer, pos - buffer);
+        } else {
+          // Otherwise leave the address range undefined.
+        }
+      }
+      // Write end-of-list entry.
+      if (is64bit) {
+        writer.PushUint64(0);
+        writer.PushUint64(0);
+      } else {
+        writer.PushUint32(0);
+        writer.PushUint32(0);
+      }
+    }
+
+    // Some types are difficult to define as we go since they need
+    // to be enclosed in the right set of namespaces. Therefore we
+    // just define all types lazily at the end of compilation unit.
+    void WriteLazyType(const char* type_descriptor) {
+      DCHECK(type_descriptor != nullptr);
+      if (type_descriptor[0] != 'V') {
+        lazy_types_.emplace(type_descriptor, info_.size());
+        info_.WriteRef4(DW_AT_type, 0);
+      }
+    }
+
+    void FinishLazyTypes() {
+      for (const auto& lazy_type : lazy_types_) {
+        info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first));
+      }
+      lazy_types_.clear();
+    }
+
+   private:
+    void WriteName(const char* name) {
+      info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
+    }
+
+    // Helper which writes DWARF expression referencing a register.
+    static uint8_t* WriteOpReg(uint8_t* buffer, uint32_t dwarf_reg_num) {
+      if (dwarf_reg_num < 32) {
+        *(buffer++) = DW_OP_reg0 + dwarf_reg_num;
+      } else {
+        *(buffer++) = DW_OP_regx;
+        buffer = EncodeUnsignedLeb128(buffer, dwarf_reg_num);
+      }
+      return buffer;
+    }
+
+    // Convert dex type descriptor to DWARF.
+    // Returns offset in the compilation unit.
+    size_t WriteType(const char* desc) {
+      const auto& it = type_cache_.find(desc);
+      if (it != type_cache_.end()) {
+        return it->second;
+      }
+
+      size_t offset;
+      if (*desc == 'L') {
+        // Class type. For example: Lpackage/name;
+        offset = StartClassTag(desc);
+        info_.WriteFlag(DW_AT_declaration, true);
+        EndClassTag(desc);
+      } else if (*desc == '[') {
+        // Array type.
+        size_t element_type = WriteType(desc + 1);
+        offset = info_.StartTag(DW_TAG_array_type);
+        info_.WriteRef(DW_AT_type, element_type);
+        info_.EndTag();
+      } else {
+        // Primitive types.
+        const char* name;
+        uint32_t encoding;
+        uint32_t byte_size;
+        switch (*desc) {
+        case 'B':
+          name = "byte";
+          encoding = DW_ATE_signed;
+          byte_size = 1;
+          break;
+        case 'C':
+          name = "char";
+          encoding = DW_ATE_UTF;
+          byte_size = 2;
+          break;
+        case 'D':
+          name = "double";
+          encoding = DW_ATE_float;
+          byte_size = 8;
+          break;
+        case 'F':
+          name = "float";
+          encoding = DW_ATE_float;
+          byte_size = 4;
+          break;
+        case 'I':
+          name = "int";
+          encoding = DW_ATE_signed;
+          byte_size = 4;
+          break;
+        case 'J':
+          name = "long";
+          encoding = DW_ATE_signed;
+          byte_size = 8;
+          break;
+        case 'S':
+          name = "short";
+          encoding = DW_ATE_signed;
+          byte_size = 2;
+          break;
+        case 'Z':
+          name = "boolean";
+          encoding = DW_ATE_boolean;
+          byte_size = 1;
+          break;
+        case 'V':
+          LOG(FATAL) << "Void type should not be encoded";
+          UNREACHABLE();
+        default:
+          LOG(FATAL) << "Unknown dex type descriptor: " << desc;
+          UNREACHABLE();
+        }
+        offset = info_.StartTag(DW_TAG_base_type);
+        WriteName(name);
+        info_.WriteData1(DW_AT_encoding, encoding);
+        info_.WriteData1(DW_AT_byte_size, byte_size);
+        info_.EndTag();
+      }
+
+      type_cache_.emplace(desc, offset);
+      return offset;
+    }
+
+    // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags.
+    // Returns offset of the class tag in the compilation unit.
+    size_t StartClassTag(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      // Enclose the type in namespace tags.
+      const char* end;
+      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+        info_.StartTag(DW_TAG_namespace);
+        WriteName(std::string(desc, end - desc).c_str());
+      }
+      // Start the class tag.
+      size_t offset = info_.StartTag(DW_TAG_class_type);
+      end = strchr(desc, ';');
+      CHECK(end != nullptr);
+      WriteName(std::string(desc, end - desc).c_str());
+      return offset;
+    }
+
+    void EndClassTag(const char* desc) {
+      DCHECK(desc != nullptr && desc[0] == 'L');
+      // End the class tag.
+      info_.EndTag();
+      // Close namespace tags.
+      const char* end;
+      for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) {
+        info_.EndTag();
+      }
+    }
+
+    // For access to the ELF sections.
+    DebugInfoWriter<ElfTypes>* owner_;
+    // Debug abbrevs for this compilation unit only.
+    std::vector<uint8_t> debug_abbrev_;
+    // Temporary buffer to create and store the entries.
+    DebugInfoEntryWriter<> info_;
+    // Cache of already translated type descriptors.
+    std::map<const char*, size_t, CStringLess> type_cache_;  // type_desc -> definition_offset.
+    // 32-bit references which need to be resolved to a type later.
+    std::multimap<const char*, size_t, CStringLess> lazy_types_;  // type_desc -> patch_offset.
+  };
+
+ public:
+  explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
   }
 
-  // Group the methods into compilation units based on source file.
-  std::vector<std::vector<const OatWriter::DebugInfo*>> compilation_units;
-  const char* last_source_file = nullptr;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    // Attribute given instruction range only to single method.
-    // Otherwise the debugger might get really confused.
-    if (!mi.deduped_) {
-      auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
-      const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
-      if (compilation_units.empty() || source_file != last_source_file) {
-        compilation_units.push_back(std::vector<const OatWriter::DebugInfo*>());
-      }
-      compilation_units.back().push_back(&mi);
-      last_source_file = source_file;
-    }
+  void Start() {
+    builder_->GetDebugInfo()->Start();
   }
 
-  // Write .debug_info section.
-  for (const auto& compilation_unit : compilation_units) {
-    uint32_t cunit_low_pc = 0xFFFFFFFFU;
-    uint32_t cunit_high_pc = 0;
-    for (auto method_info : compilation_unit) {
-      cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_);
-      cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
-    }
+  void WriteCompilationUnit(const CompilationUnit& compilation_unit) {
+    CompilationUnitWriter writer(this);
+    writer.Write(compilation_unit);
+  }
 
-    size_t debug_abbrev_offset = debug_abbrev->size();
-    DebugInfoEntryWriter<> info(is64bit, debug_abbrev);
-    info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
-    info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str);
-    info.WriteData1(DW_AT_language, DW_LANG_Java);
-    info.WriteAddr(DW_AT_low_pc, cunit_low_pc);
-    info.WriteAddr(DW_AT_high_pc, cunit_high_pc);
-    info.WriteData4(DW_AT_stmt_list, debug_line->size());
-    for (auto method_info : compilation_unit) {
-      std::string method_name = PrettyMethod(method_info->dex_method_index_,
-                                             *method_info->dex_file_, true);
-      if (deduped_addresses.find(method_info->low_pc_) != deduped_addresses.end()) {
-        method_name += " [DEDUPED]";
-      }
-      info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
-      info.WriteStrp(DW_AT_name, method_name.data(), debug_str);
-      info.WriteAddr(DW_AT_low_pc, method_info->low_pc_);
-      info.WriteAddr(DW_AT_high_pc, method_info->high_pc_);
-      info.EndTag();  // DW_TAG_subprogram
-    }
-    info.EndTag();  // DW_TAG_compile_unit
-    WriteDebugInfoCU(debug_abbrev_offset, info, debug_info, debug_info_patches);
+  void End() {
+    builder_->GetDebugInfo()->End();
+    builder_->WritePatches(".debug_info.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_info_patches_));
+    builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
+    builder_->WriteSection(".debug_str", &debug_str_.Data());
+    builder_->WriteSection(".debug_loc", &debug_loc_);
+  }
 
-    // Write .debug_line section.
+ private:
+  size_t WriteString(const char* str) {
+    return debug_str_.Insert(reinterpret_cast<const uint8_t*>(str), strlen(str) + 1);
+  }
+
+  ElfBuilder<ElfTypes>* builder_;
+  std::vector<uintptr_t> debug_info_patches_;
+  DedupVector debug_abbrev_;
+  DedupVector debug_str_;
+  std::vector<uint8_t> debug_loc_;
+
+  std::unordered_set<const char*> defined_dex_classes_;  // For CHECKs only.
+};
+
+template<typename ElfTypes>
+class DebugLineWriter {
+  typedef typename ElfTypes::Addr Elf_Addr;
+
+ public:
+  explicit DebugLineWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) {
+  }
+
+  void Start() {
+    builder_->GetDebugLine()->Start();
+  }
+
+  // Write line table for given set of methods.
+  // Returns the number of bytes written.
+  size_t WriteCompilationUnit(CompilationUnit& compilation_unit) {
+    const bool is64bit = Is64BitInstructionSet(builder_->GetIsa());
+    const Elf_Addr text_address = builder_->GetText()->GetAddress();
+
+    compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize();
+
     std::vector<FileEntry> files;
     std::unordered_map<std::string, size_t> files_map;
     std::vector<std::string> directories;
     std::unordered_map<std::string, size_t> directories_map;
     int code_factor_bits_ = 0;
     int dwarf_isa = -1;
-    switch (isa) {
+    switch (builder_->GetIsa()) {
       case kArm:  // arm actually means thumb2.
       case kThumb2:
         code_factor_bits_ = 1;  // 16-bit instuctions
@@ -328,11 +807,17 @@
         break;
     }
     DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
-    opcodes.SetAddress(cunit_low_pc);
+    opcodes.SetAddress(text_address + compilation_unit.low_pc_);
     if (dwarf_isa != -1) {
       opcodes.SetISA(dwarf_isa);
     }
-    for (const OatWriter::DebugInfo* mi : compilation_unit) {
+    for (const MethodDebugInfo* mi : compilation_unit.methods_) {
+      // Ignore function if we have already generated line table for the same address.
+      // It would confuse the debugger and the DWARF specification forbids it.
+      if (mi->deduped_) {
+        continue;
+      }
+
       struct DebugInfoCallbacks {
         static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
           auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
@@ -342,6 +827,8 @@
         DefaultSrcMap dex2line_;
       } debug_info_callbacks;
 
+      Elf_Addr method_address = text_address + mi->low_pc_;
+
       const DexFile* dex = mi->dex_file_;
       if (mi->code_item_ != nullptr) {
         dex->DecodeDebugInfo(mi->code_item_,
@@ -414,26 +901,97 @@
                 int first_line = dex2line_map.front().to_;
                 // Prologue is not a sensible place for a breakpoint.
                 opcodes.NegateStmt();
-                opcodes.AddRow(mi->low_pc_, first_line);
+                opcodes.AddRow(method_address, first_line);
                 opcodes.NegateStmt();
                 opcodes.SetPrologueEnd();
               }
-              opcodes.AddRow(mi->low_pc_ + pc, line);
+              opcodes.AddRow(method_address + pc, line);
             } else if (line != opcodes.CurrentLine()) {
-              opcodes.AddRow(mi->low_pc_ + pc, line);
+              opcodes.AddRow(method_address + pc, line);
             }
           }
         }
       } else {
         // line 0 - instruction cannot be attributed to any source line.
-        opcodes.AddRow(mi->low_pc_, 0);
+        opcodes.AddRow(method_address, 0);
       }
     }
-    opcodes.AdvancePC(cunit_high_pc);
+    opcodes.AdvancePC(text_address + compilation_unit.high_pc_);
     opcodes.EndSequence();
-    WriteDebugLineTable(directories, files, opcodes, debug_line, debug_line_patches);
+    std::vector<uint8_t> buffer;
+    buffer.reserve(opcodes.data()->size() + KB);
+    size_t offset = builder_->GetDebugLine()->GetSize();
+    WriteDebugLineTable(directories, files, opcodes, offset, &buffer, &debug_line_patches);
+    builder_->GetDebugLine()->WriteFully(buffer.data(), buffer.size());
+    return buffer.size();
+  }
+
+  void End() {
+    builder_->GetDebugLine()->End();
+    builder_->WritePatches(".debug_line.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_line_patches));
+  }
+
+ private:
+  ElfBuilder<ElfTypes>* builder_;
+  std::vector<uintptr_t> debug_line_patches;
+};
+
+template<typename ElfTypes>
+void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                        const ArrayRef<const MethodDebugInfo>& method_infos) {
+  // Group the methods into compilation units based on source file.
+  std::vector<CompilationUnit> compilation_units;
+  const char* last_source_file = nullptr;
+  for (const MethodDebugInfo& mi : method_infos) {
+    auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
+    const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
+    if (compilation_units.empty() || source_file != last_source_file) {
+      compilation_units.push_back(CompilationUnit());
+    }
+    CompilationUnit& cu = compilation_units.back();
+    cu.methods_.push_back(&mi);
+    cu.low_pc_ = std::min(cu.low_pc_, mi.low_pc_);
+    cu.high_pc_ = std::max(cu.high_pc_, mi.high_pc_);
+    last_source_file = source_file;
+  }
+
+  // Write .debug_line section.
+  {
+    DebugLineWriter<ElfTypes> line_writer(builder);
+    line_writer.Start();
+    for (auto& compilation_unit : compilation_units) {
+      line_writer.WriteCompilationUnit(compilation_unit);
+    }
+    line_writer.End();
+  }
+
+  // Write .debug_info section.
+  {
+    DebugInfoWriter<ElfTypes> info_writer(builder);
+    info_writer.Start();
+    for (const auto& compilation_unit : compilation_units) {
+      info_writer.WriteCompilationUnit(compilation_unit);
+    }
+    info_writer.End();
   }
 }
 
+// Explicit instantiations
+template void WriteCFISection<ElfTypes32>(
+    ElfBuilder<ElfTypes32>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    CFIFormat format);
+template void WriteCFISection<ElfTypes64>(
+    ElfBuilder<ElfTypes64>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
+    CFIFormat format);
+template void WriteDebugSections<ElfTypes32>(
+    ElfBuilder<ElfTypes32>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
+template void WriteDebugSections<ElfTypes64>(
+    ElfBuilder<ElfTypes64>* builder,
+    const ArrayRef<const MethodDebugInfo>& method_infos);
+
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 69f7e0d..9ed102f 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,31 +17,22 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include <vector>
-
+#include "elf_builder.h"
 #include "dwarf/dwarf_constants.h"
 #include "oat_writer.h"
+#include "utils/array_ref.h"
 
 namespace art {
 namespace dwarf {
 
-void WriteCFISection(const CompilerDriver* compiler,
-                     const OatWriter* oat_writer,
-                     ExceptionHeaderValueApplication address_type,
-                     CFIFormat format,
-                     std::vector<uint8_t>* debug_frame,
-                     std::vector<uintptr_t>* debug_frame_patches,
-                     std::vector<uint8_t>* eh_frame_hdr,
-                     std::vector<uintptr_t>* eh_frame_hdr_patches);
+template<typename ElfTypes>
+void WriteCFISection(ElfBuilder<ElfTypes>* builder,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
+                     CFIFormat format);
 
-void WriteDebugSections(const CompilerDriver* compiler,
-                        const OatWriter* oat_writer,
-                        std::vector<uint8_t>* debug_info,
-                        std::vector<uintptr_t>* debug_info_patches,
-                        std::vector<uint8_t>* debug_abbrev,
-                        std::vector<uint8_t>* debug_str,
-                        std::vector<uint8_t>* debug_line,
-                        std::vector<uintptr_t>* debug_line_patches);
+template<typename ElfTypes>
+void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                        const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index dce1e86..9da2af8 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,19 +21,16 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/unix_file/fd_file.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf.h"
 #include "elf_builder.h"
-#include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
-#include "oat.h"
-#include "oat_writer.h"
 #include "utils.h"
 
 namespace art {
@@ -57,241 +54,198 @@
 constexpr bool kGenerateSingleArmMappingSymbol = true;
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Create(File* elf_file,
-                                      OatWriter* oat_writer,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
-}
-
-template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
-
-// Encode patch locations as LEB128 list of deltas between consecutive addresses.
-template <typename ElfTypes>
-void ElfWriterQuick<ElfTypes>::EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                                                std::vector<uint8_t>* buffer) {
-  buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
-  uintptr_t address = 0;  // relative to start of section.
-  for (uintptr_t location : locations) {
-    DCHECK_GE(location, address) << "Patch locations are not in sorted order";
-    EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address));
-    address = location;
-  }
-}
-
-class RodataWriter FINAL : public CodeOutput {
+class ElfWriterQuick FINAL : public ElfWriter {
  public:
-  explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
+  ElfWriterQuick(InstructionSet instruction_set,
+                 const CompilerOptions* compiler_options,
+                 File* elf_file);
+  ~ElfWriterQuick();
 
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->WriteRodata(out);
-  }
+  void Start() OVERRIDE;
+  OutputStream* StartRoData() OVERRIDE;
+  void EndRoData(OutputStream* rodata) OVERRIDE;
+  OutputStream* StartText() OVERRIDE;
+  void EndText(OutputStream* text) OVERRIDE;
+  void SetBssSize(size_t bss_size) OVERRIDE;
+  void WriteDynamicSection() OVERRIDE;
+  void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) OVERRIDE;
+  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
+  bool End() OVERRIDE;
+
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer);
 
  private:
-  OatWriter* oat_writer_;
+  const CompilerOptions* const compiler_options_;
+  File* const elf_file_;
+  std::unique_ptr<BufferedOutputStream> output_stream_;
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
 };
 
-class TextWriter FINAL : public CodeOutput {
- public:
-  explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
-
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->WriteCode(out);
-  }
-
- private:
-  OatWriter* oat_writer_;
-};
-
-enum PatchResult {
-  kAbsoluteAddress,  // Absolute memory location.
-  kPointerRelativeAddress,  // Offset relative to the location of the pointer.
-  kSectionRelativeAddress,  // Offset relative to start of containing section.
-};
-
-// Patch memory addresses within a buffer.
-// It assumes that the unpatched addresses are offsets relative to base_address.
-// (which generally means method's low_pc relative to the start of .text)
-template <typename Elf_Addr, typename Address, PatchResult kPatchResult>
-static void Patch(const std::vector<uintptr_t>& patch_locations,
-                  Elf_Addr buffer_address, Elf_Addr base_address,
-                  std::vector<uint8_t>* buffer) {
-  for (uintptr_t location : patch_locations) {
-    typedef __attribute__((__aligned__(1))) Address UnalignedAddress;
-    auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer->data() + location);
-    switch (kPatchResult) {
-      case kAbsoluteAddress:
-        *to_patch = (base_address + *to_patch);
-        break;
-      case kPointerRelativeAddress:
-        *to_patch = (base_address + *to_patch) - (buffer_address + location);
-        break;
-      case kSectionRelativeAddress:
-        *to_patch = (base_address + *to_patch) - buffer_address;
-        break;
-    }
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file) {
+  if (Is64BitInstructionSet(instruction_set)) {
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, compiler_options, elf_file);
+  } else {
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, compiler_options, elf_file);
   }
 }
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Write(
-    OatWriter* oat_writer,
-    const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
-    const std::string& android_root_unused ATTRIBUTE_UNUSED,
-    bool is_host_unused ATTRIBUTE_UNUSED) {
-  using Elf_Addr = typename ElfTypes::Addr;
-  const InstructionSet isa = compiler_driver_->GetInstructionSet();
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos);
 
-  // Setup the builder with the main OAT sections (.rodata .text .bss).
-  const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
-  const size_t text_size = oat_writer->GetSize() - rodata_size;
-  const size_t bss_size = oat_writer->GetBssSize();
-  RodataWriter rodata_writer(oat_writer);
-  TextWriter text_writer(oat_writer);
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(
-      isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size));
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const CompilerOptions* compiler_options,
+                                         File* elf_file)
+    : ElfWriter(),
+      compiler_options_(compiler_options),
+      elf_file_(elf_file),
+      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {}
 
-  // Add debug sections.
-  // They are allocated here (in the same scope as the builder),
-  // but they are registered with the builder only if they are used.
-  using RawSection = typename ElfBuilder<ElfTypes>::RawSection;
-  const auto* text = builder->GetText();
-  const bool is64bit = Is64BitInstructionSet(isa);
-  const int pointer_size = GetInstructionSetPointerSize(isa);
-  std::unique_ptr<RawSection> eh_frame(new RawSection(
-      ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
-      is64bit ? Patch<Elf_Addr, uint64_t, kPointerRelativeAddress> :
-                Patch<Elf_Addr, uint32_t, kPointerRelativeAddress>,
-      text));
-  std::unique_ptr<RawSection> eh_frame_hdr(new RawSection(
-      ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0,
-      Patch<Elf_Addr, uint32_t, kSectionRelativeAddress>, text));
-  std::unique_ptr<RawSection> debug_frame(new RawSection(
-      ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, pointer_size, 0,
-      is64bit ? Patch<Elf_Addr, uint64_t, kAbsoluteAddress> :
-                Patch<Elf_Addr, uint32_t, kAbsoluteAddress>,
-      text));
-  std::unique_ptr<RawSection> debug_frame_oat_patches(new RawSection(
-      ".debug_frame.oat_patches", SHT_OAT_PATCH));
-  std::unique_ptr<RawSection> debug_info(new RawSection(
-      ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
-      Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text));
-  std::unique_ptr<RawSection> debug_info_oat_patches(new RawSection(
-      ".debug_info.oat_patches", SHT_OAT_PATCH));
-  std::unique_ptr<RawSection> debug_abbrev(new RawSection(
-      ".debug_abbrev", SHT_PROGBITS));
-  std::unique_ptr<RawSection> debug_str(new RawSection(
-      ".debug_str", SHT_PROGBITS));
-  std::unique_ptr<RawSection> debug_line(new RawSection(
-      ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
-      Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text));
-  std::unique_ptr<RawSection> debug_line_oat_patches(new RawSection(
-      ".debug_line.oat_patches", SHT_OAT_PATCH));
-  if (!oat_writer->GetMethodDebugInfo().empty()) {
-    if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-      // Generate CFI (stack unwinding information).
-      if (kCFIFormat == dwarf::DW_EH_FRAME_FORMAT) {
-        dwarf::WriteCFISection(
-            compiler_driver_, oat_writer,
-            dwarf::DW_EH_PE_pcrel, kCFIFormat,
-            eh_frame->GetBuffer(), eh_frame->GetPatchLocations(),
-            eh_frame_hdr->GetBuffer(), eh_frame_hdr->GetPatchLocations());
-        builder->RegisterSection(eh_frame.get());
-        builder->RegisterSection(eh_frame_hdr.get());
-      } else {
-        DCHECK(kCFIFormat == dwarf::DW_DEBUG_FRAME_FORMAT);
-        dwarf::WriteCFISection(
-            compiler_driver_, oat_writer,
-            dwarf::DW_EH_PE_absptr, kCFIFormat,
-            debug_frame->GetBuffer(), debug_frame->GetPatchLocations(),
-            nullptr, nullptr);
-        builder->RegisterSection(debug_frame.get());
-        EncodeOatPatches(*debug_frame->GetPatchLocations(),
-                         debug_frame_oat_patches->GetBuffer());
-        builder->RegisterSection(debug_frame_oat_patches.get());
-      }
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::Start() {
+  builder_->Start();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() {
+  auto* rodata = builder_->GetRoData();
+  rodata->Start();
+  return rodata;
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndRoData(OutputStream* rodata) {
+  CHECK_EQ(builder_->GetRoData(), rodata);
+  builder_->GetRoData()->End();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartText() {
+  auto* text = builder_->GetText();
+  text->Start();
+  return text;
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) {
+  CHECK_EQ(builder_->GetText(), text);
+  builder_->GetText()->End();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) {
+  auto* bss = builder_->GetBss();
+  if (bss_size != 0u) {
+    bss->Start();
+    bss->SetSize(bss_size);
+    bss->End();
+  }
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
+  builder_->WriteDynamicSection(elf_file_->GetPath());
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
+    const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    if (!method_infos.empty()) {
       // Add methods to .symtab.
-      WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate DWARF .debug_* sections.
-      dwarf::WriteDebugSections(
-          compiler_driver_, oat_writer,
-          debug_info->GetBuffer(), debug_info->GetPatchLocations(),
-          debug_abbrev->GetBuffer(),
-          debug_str->GetBuffer(),
-          debug_line->GetBuffer(), debug_line->GetPatchLocations());
-      builder->RegisterSection(debug_info.get());
-      EncodeOatPatches(*debug_info->GetPatchLocations(),
-                       debug_info_oat_patches->GetBuffer());
-      builder->RegisterSection(debug_info_oat_patches.get());
-      builder->RegisterSection(debug_abbrev.get());
-      builder->RegisterSection(debug_str.get());
-      builder->RegisterSection(debug_line.get());
-      EncodeOatPatches(*debug_line->GetPatchLocations(),
-                       debug_line_oat_patches->GetBuffer());
-      builder->RegisterSection(debug_line_oat_patches.get());
+      WriteDebugSymbols(builder_.get(), method_infos);
+      // Generate CFI (stack unwinding information).
+      dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat);
+      // Write DWARF .debug_* sections.
+      dwarf::WriteDebugSections(builder_.get(), method_infos);
     }
   }
+}
 
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WritePatchLocations(
+    const ArrayRef<const uintptr_t>& patch_locations) {
   // Add relocation section for .text.
-  std::unique_ptr<RawSection> text_oat_patches(new RawSection(
-      ".text.oat_patches", SHT_OAT_PATCH));
-  if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
+  if (compiler_options_->GetIncludePatchInformation()) {
     // Note that ElfWriter::Fixup will be called regardless and therefore
     // we need to include oat_patches for debug sections unconditionally.
-    EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(),
-                     text_oat_patches->GetBuffer());
-    builder->RegisterSection(text_oat_patches.get());
+    builder_->WritePatches(".text.oat_patches", patch_locations);
   }
-
-  return builder->Write(elf_file_);
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
+bool ElfWriterQuick<ElfTypes>::End() {
+  builder_->End();
+
+  return builder_->Good();
+}
+
+template <typename ElfTypes>
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   bool generated_mapping_symbol = false;
+  auto* strtab = builder->GetStrTab();
+  auto* symtab = builder->GetSymTab();
+
+  if (method_infos.empty()) {
+    return;
+  }
 
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
   std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
     }
   }
 
-  auto* symtab = builder->GetSymtab();
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
+  strtab->Start();
+  strtab->Write("");  // strtab should start with empty string.
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
-    if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
       name += " [DEDUPED]";
     }
 
-    uint32_t low_pc = it->low_pc_;
+    uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += it->compiled_method_->CodeDelta();
-    symtab->AddSymbol(name, builder->GetText(), low_pc,
-                      true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+    low_pc += info.compiled_method_->CodeDelta();
+    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     // Note that even if we generate just a single mapping symbol, ARM's Streamline
     // requires it to match function symbol.  Just address 0 does not work.
-    if (it->compiled_method_->GetInstructionSet() == kThumb2) {
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true,
-                          0, STB_LOCAL, STT_NOTYPE);
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
+                    true, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
     }
   }
+  strtab->End();
+
+  // Symbols are buffered and written after names (because they are smaller).
+  // We could also do two passes in this function to avoid the buffering.
+  symtab->Start();
+  symtab->Write();
+  symtab->End();
 }
 
 // Explicit instantiations
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 83781ab..347d372 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,46 +17,19 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
-#include "elf_utils.h"
+#include <memory>
+
+#include "arch/instruction_set.h"
 #include "elf_writer.h"
-#include "oat_writer.h"
+#include "os.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfWriterQuick FINAL : public ElfWriter {
- public:
-  // Write an ELF file. Returns true on success, false on failure.
-  static bool Create(File* file,
-                     OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host,
-                     const CompilerDriver& driver)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+class CompilerOptions;
 
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                               std::vector<uint8_t>* buffer);
-
- protected:
-  bool Write(OatWriter* oat_writer,
-             const std::vector<const DexFile*>& dex_files,
-             const std::string& android_root,
-             bool is_host)
-      OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  ElfWriterQuick(const CompilerDriver& driver, File* elf_file)
-    : ElfWriter(driver, elf_file) {}
-  ~ElfWriterQuick() {}
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
-};
-
-// Explicitly instantiated in elf_writer_quick.cc
-typedef ElfWriterQuick<ElfTypes32> ElfWriterQuick32;
-typedef ElfWriterQuick<ElfTypes64> ElfWriterQuick64;
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file);
 
 }  // namespace art
 
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index ccf34b8..7cf774e 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -21,6 +21,7 @@
 #include "common_compiler_test.h"
 #include "elf_file.h"
 #include "elf_file_impl.h"
+#include "elf_builder.h"
 #include "elf_writer_quick.h"
 #include "oat.h"
 #include "utils.h"
@@ -100,7 +101,8 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfWriterQuick32::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(ArrayRef<const uintptr_t>(patch_locations),
+                                             &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/file_output_stream.cc b/compiler/file_output_stream.cc
index 3ee16f5..bbfbdfd 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/file_output_stream.cc
@@ -33,4 +33,8 @@
   return lseek(file_->Fd(), offset, static_cast<int>(whence));
 }
 
+bool FileOutputStream::Flush() {
+  return file_->Flush() == 0;
+}
+
 }  // namespace art
diff --git a/compiler/file_output_stream.h b/compiler/file_output_stream.h
index 9dfbd7f..6917d83 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/file_output_stream.h
@@ -27,11 +27,13 @@
  public:
   explicit FileOutputStream(File* file);
 
-  virtual ~FileOutputStream() {}
+  ~FileOutputStream() OVERRIDE {}
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
 
  private:
   File* const file_;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index fd6cd82..5f4a922 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,7 +23,9 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
 #include "lock_word.h"
@@ -64,8 +66,10 @@
   ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
 
   const uintptr_t requested_image_base = ART_BASE_ADDRESS;
-  std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, requested_image_base,
-                                                      /*compile_pic*/false));
+  std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_,
+                                                      requested_image_base,
+                                                      /*compile_pic*/false,
+                                                      /*compile_app_image*/false));
   // TODO: compile_pic should be a test argument.
   {
     {
@@ -81,14 +85,46 @@
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(),
-                           writer.get(), &timings, &key_value_store);
-      bool success = writer->PrepareImageAddressSpace() &&
-          compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                     !kIsTargetBuild,
-                                     class_linker->GetBootClassPath(),
-                                     &oat_writer,
-                                     oat_file.GetFile());
+      OatWriter oat_writer(class_linker->GetBootClassPath(),
+                           0,
+                           0,
+                           0,
+                           compiler_driver_.get(),
+                           writer.get(),
+                           /*compiling_boot_image*/true,
+                           &timings,
+                           &key_value_store);
+      std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+          compiler_driver_->GetInstructionSet(),
+          &compiler_driver_->GetCompilerOptions(),
+          oat_file.GetFile());
+      bool success = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(success);
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      bool rodata_ok = oat_writer.WriteRodata(rodata);
+      ASSERT_TRUE(rodata_ok);
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      bool text_ok = oat_writer.WriteCode(text);
+      ASSERT_TRUE(text_ok);
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      success = elf_writer->End();
+
       ASSERT_TRUE(success);
     }
   }
@@ -172,7 +208,7 @@
   ASSERT_TRUE(heap->HasImageSpace());
   ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
 
-  gc::space::ImageSpace* image_space = heap->GetImageSpace();
+  gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
   ASSERT_TRUE(image_space != nullptr);
   ASSERT_LE(image_space->Size(), image_file_size);
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index af2a4f9..341742e 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -20,6 +20,7 @@
 
 #include <memory>
 #include <numeric>
+#include <unordered_set>
 #include <vector>
 
 #include "art_field-inl.h"
@@ -72,6 +73,27 @@
 // Separate objects into multiple bins to optimize dirty memory use.
 static constexpr bool kBinObjects = true;
 
+// Return true if an object is already in an image space.
+bool ImageWriter::IsInBootImage(const void* obj) const {
+  if (!compile_app_image_) {
+    DCHECK(boot_image_space_ == nullptr);
+    return false;
+  }
+  const uint8_t* image_begin = boot_image_space_->Begin();
+  // Real image end including ArtMethods and ArtField sections.
+  const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize();
+  return image_begin <= obj && obj < image_end;
+}
+
+bool ImageWriter::IsInBootOatFile(const void* ptr) const {
+  if (!compile_app_image_) {
+    DCHECK(boot_image_space_ == nullptr);
+    return false;
+  }
+  const ImageHeader& image_header = boot_image_space_->GetImageHeader();
+  return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd();
+}
+
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* klass = obj->GetClass();
@@ -85,12 +107,20 @@
 
 bool ImageWriter::PrepareImageAddressSpace() {
   target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  // Cache boot image space.
+    for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
+      if (space->IsImageSpace()) {
+        CHECK(compile_app_image_);
+        CHECK(boot_image_space_ == nullptr) << "Multiple image spaces";
+        boot_image_space_ = space->AsImageSpace();
+      }
+    }
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
   }
-  gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);  // Remove garbage.
 
   // Dex caches must not have their dex fields set in the image. These are memory buffers of mapped
@@ -144,21 +174,21 @@
   Runtime::Current()->GetOatFileManager().RegisterOatFile(
       std::unique_ptr<const OatFile>(oat_file_));
 
-  interpreter_to_interpreter_bridge_offset_ =
-      oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset();
-  interpreter_to_compiled_code_bridge_offset_ =
-      oat_file_->GetOatHeader().GetInterpreterToCompiledCodeBridgeOffset();
-
-  jni_dlsym_lookup_offset_ = oat_file_->GetOatHeader().GetJniDlsymLookupOffset();
-
-  quick_generic_jni_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickGenericJniTrampolineOffset();
-  quick_imt_conflict_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickImtConflictTrampolineOffset();
-  quick_resolution_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickResolutionTrampolineOffset();
-  quick_to_interpreter_bridge_offset_ =
-      oat_file_->GetOatHeader().GetQuickToInterpreterBridgeOffset();
+  const OatHeader& oat_header = oat_file_->GetOatHeader();
+  oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] =
+      oat_header.GetInterpreterToInterpreterBridgeOffset();
+  oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] =
+      oat_header.GetInterpreterToCompiledCodeBridgeOffset();
+  oat_address_offsets_[kOatAddressJNIDlsymLookup] =
+      oat_header.GetJniDlsymLookupOffset();
+  oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] =
+      oat_header.GetQuickGenericJniTrampolineOffset();
+  oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] =
+      oat_header.GetQuickImtConflictTrampolineOffset();
+  oat_address_offsets_[kOatAddressQuickResolutionTrampoline] =
+      oat_header.GetQuickResolutionTrampolineOffset();
+  oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
+      oat_header.GetQuickToInterpreterBridgeOffset();
 
   size_t oat_loaded_size = 0;
   size_t oat_data_offset = 0;
@@ -300,37 +330,44 @@
 }
 
 void ImageWriter::PrepareDexCacheArraySlots() {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Thread* const self = Thread::Current();
-  ReaderMutexLock mu(self, *class_linker->DexLock());
+  // Prepare dex cache array starts based on the ordering specified in the CompilerDriver.
   uint32_t size = 0u;
-  for (jobject weak_root : class_linker->GetDexCaches()) {
-    mirror::DexCache* dex_cache =
-        down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache == nullptr) {
-      continue;
-    }
-    const DexFile* dex_file = dex_cache->GetDexFile();
+  for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) {
     dex_cache_array_starts_.Put(dex_file, size);
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
-    DCHECK(layout.Valid());
-    DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
-    DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
-    DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
-    DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetStrings(), size + layout.StringsOffset());
     size += layout.Size();
   }
   // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
   // when AssignImageBinSlot() assigns their indexes out or order.
   bin_slot_sizes_[kBinDexCacheArray] = size;
+
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Thread* const self = Thread::Current();
+  ReaderMutexLock mu(self, *class_linker->DexLock());
+  for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+    mirror::DexCache* dex_cache =
+        down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+    if (dex_cache == nullptr || IsInBootImage(dex_cache)) {
+      continue;
+    }
+    const DexFile* dex_file = dex_cache->GetDexFile();
+    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
+    DCHECK(layout.Valid());
+    uint32_t start = dex_cache_array_starts_.Get(dex_file);
+    DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr);
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset());
+    DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr);
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset());
+    DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr);
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset());
+    DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
+    AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset());
+  }
 }
 
 void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) {
   if (array != nullptr) {
+    DCHECK(!IsInBootImage(array));
     native_object_relocations_.emplace(
         array,
         NativeObjectRelocation { offset, kNativeObjectRelocationTypeDexCacheArray });
@@ -341,11 +378,11 @@
   DCHECK(arr != nullptr);
   if (kIsDebugBuild) {
     for (size_t i = 0, len = arr->GetLength(); i < len; i++) {
-      auto* method = arr->GetElementPtrSize<ArtMethod*>(i, target_ptr_size_);
+      ArtMethod* method = arr->GetElementPtrSize<ArtMethod*>(i, target_ptr_size_);
       if (method != nullptr && !method->IsRuntimeMethod()) {
-        auto* klass = method->GetDeclaringClass();
-        CHECK(klass == nullptr || IsImageClass(klass)) << PrettyClass(klass)
-            << " should be an image class";
+        mirror::Class* klass = method->GetDeclaringClass();
+        CHECK(klass == nullptr || KeepClass(klass))
+            << PrettyClass(klass) << " should be a kept class";
       }
     }
   }
@@ -483,7 +520,7 @@
     size_t offset = lock_word.ForwardingAddress();
     BinSlot bin_slot(offset);
     DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()])
-      << "bin slot offset should not exceed the size of that bin";
+        << "bin slot offset should not exceed the size of that bin";
   }
   return true;
 }
@@ -506,8 +543,13 @@
   const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_,
                                 kPageSize);
   std::string error_msg;
-  image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE,
-                                    false, false, &error_msg));
+  image_.reset(MemMap::MapAnonymous("image writer image",
+                                    nullptr,
+                                    length,
+                                    PROT_READ | PROT_WRITE,
+                                    false,
+                                    false,
+                                    &error_msg));
   if (UNLIKELY(image_.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
     return false;
@@ -516,7 +558,9 @@
   // Create the image bitmap, only needs to cover mirror object section which is up to image_end_.
   CHECK_LE(image_end_, length);
   image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create(
-      "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize)));
+      "image bitmap",
+      image_->Begin(),
+      RoundUp(image_end_, kPageSize)));
   if (image_bitmap_.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate memory for image bitmap";
     return false;
@@ -539,10 +583,107 @@
   class_linker->VisitClassesWithoutClassesLock(&visitor);
 }
 
-bool ImageWriter::IsImageClass(Class* klass) {
+static bool IsBootClassLoaderClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return klass->GetClassLoader() == nullptr;
+}
+
+bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) {
+  return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
+}
+
+bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
+  bool early_exit = false;
+  std::unordered_set<mirror::Class*> visited;
+  return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited);
+}
+
+bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal(
+    mirror::Class* klass,
+    bool* early_exit,
+    std::unordered_set<mirror::Class*>* visited) {
+  DCHECK(early_exit != nullptr);
+  DCHECK(visited != nullptr);
   if (klass == nullptr) {
     return false;
   }
+  auto found = prune_class_memo_.find(klass);
+  if (found != prune_class_memo_.end()) {
+    // Already computed, return the found value.
+    return found->second;
+  }
+  // Circular dependencies, return false but do not store the result in the memoization table.
+  if (visited->find(klass) != visited->end()) {
+    *early_exit = true;
+    return false;
+  }
+  visited->emplace(klass);
+  bool result = IsBootClassLoaderNonImageClass(klass);
+  bool my_early_exit = false;  // Only for ourselves, ignore caller.
+  if (!result) {
+    // Check interfaces since these wont be visited through VisitReferences.)
+    mirror::IfTable* if_table = klass->GetIfTable();
+    for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+      result = result || ContainsBootClassLoaderNonImageClassInternal(
+          if_table->GetInterface(i),
+          &my_early_exit,
+          visited);
+    }
+  }
+  // Check static fields and their classes.
+  size_t num_static_fields = klass->NumReferenceStaticFields();
+  if (num_static_fields != 0 && klass->IsResolved()) {
+    // Presumably GC can happen when we are cross compiling, it should not cause performance
+    // problems to do pointer size logic.
+    MemberOffset field_offset = klass->GetFirstReferenceStaticFieldOffset(
+        Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+    for (size_t i = 0u; i < num_static_fields; ++i) {
+      mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
+      if (ref != nullptr) {
+        if (ref->IsClass()) {
+          result = result ||
+                   ContainsBootClassLoaderNonImageClassInternal(
+                       ref->AsClass(),
+                       &my_early_exit,
+                       visited);
+        }
+        result = result ||
+                 ContainsBootClassLoaderNonImageClassInternal(
+                     ref->GetClass(),
+                     &my_early_exit,
+                     visited);
+      }
+      field_offset = MemberOffset(field_offset.Uint32Value() +
+                                  sizeof(mirror::HeapReference<mirror::Object>));
+    }
+  }
+  result = result ||
+           ContainsBootClassLoaderNonImageClassInternal(
+               klass->GetSuperClass(),
+               &my_early_exit,
+               visited);
+  // Erase the element we stored earlier since we are exiting the function.
+  auto it = visited->find(klass);
+  DCHECK(it != visited->end());
+  visited->erase(it);
+  // Only store result if it is true or none of the calls early exited due to circular
+  // dependencies. If visited is empty then we are the root caller, in this case the cycle was in
+  // a child call and we can remember the result.
+  if (result == true || !my_early_exit || visited->empty()) {
+    prune_class_memo_[klass] = result;
+  }
+  *early_exit |= my_early_exit;
+  return result;
+}
+
+bool ImageWriter::KeepClass(Class* klass) {
+  if (klass == nullptr) {
+    return false;
+  }
+  if (compile_app_image_) {
+    // For app images, we need to prune boot loader classes that are not in the boot image since
+    // these may have already been loaded when the app image is loaded.
+    return !ContainsBootClassLoaderNonImageClass(klass);
+  }
   std::string temp;
   return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
 }
@@ -552,21 +693,17 @@
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
   bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!image_writer_->IsImageClass(klass)) {
-      std::string temp;
-      non_image_classes_.insert(klass->GetDescriptor(&temp));
+    if (!image_writer_->KeepClass(klass)) {
+      classes_to_prune_.insert(klass);
     }
     return true;
   }
 
-  std::set<std::string> non_image_classes_;
+  std::unordered_set<mirror::Class*> classes_to_prune_;
   ImageWriter* const image_writer_;
 };
 
 void ImageWriter::PruneNonImageClasses() {
-  if (compiler_driver_.GetImageClasses() == nullptr) {
-    return;
-  }
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
@@ -576,8 +713,14 @@
   class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
-  for (const std::string& it : visitor.non_image_classes_) {
-    bool result = class_linker->RemoveClass(it.c_str(), nullptr);
+  for (mirror::Class* klass : visitor.classes_to_prune_) {
+    std::string temp;
+    const char* name = klass->GetDescriptor(&temp);
+    VLOG(compiler) << "Pruning class " << name;
+    if (!compile_app_image_) {
+      DCHECK(IsBootClassLoaderClass(klass));
+    }
+    bool result = class_linker->RemoveClass(name, klass->GetClassLoader());
     DCHECK(result);
   }
 
@@ -587,14 +730,14 @@
   ScopedAssertNoThreadSuspension sa(self, __FUNCTION__);
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
   ReaderMutexLock mu2(self, *class_linker->DexLock());
-  for (jobject weak_root : class_linker->GetDexCaches()) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+  for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
     if (dex_cache == nullptr) {
       continue;
     }
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
-      if (klass != nullptr && !IsImageClass(klass)) {
+      if (klass != nullptr && !KeepClass(klass)) {
         dex_cache->SetResolvedType(i, nullptr);
       }
     }
@@ -607,7 +750,7 @@
         // Miranda methods may be held live by a class which was not an image class but have a
         // declaring class which is an image class. Set it to the resolution method to be safe and
         // prevent dangling pointers.
-        if (method->IsMiranda() || !IsImageClass(declaring_class)) {
+        if (method->IsMiranda() || !KeepClass(declaring_class)) {
           mirror::DexCache::SetElementPtrSize(resolved_methods,
                                               i,
                                               resolution_method,
@@ -621,7 +764,7 @@
     }
     for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
       ArtField* field = dex_cache->GetResolvedField(i, target_ptr_size_);
-      if (field != nullptr && !IsImageClass(field->GetDeclaringClass())) {
+      if (field != nullptr && !KeepClass(field->GetDeclaringClass())) {
         dex_cache->SetResolvedField(i, nullptr, target_ptr_size_);
       }
     }
@@ -632,6 +775,9 @@
 
   // Drop the array class cache in the ClassLinker, as these are roots holding those classes live.
   class_linker->DropFindArrayClassCache();
+
+  // Clear to save RAM.
+  prune_class_memo_.clear();
 }
 
 void ImageWriter::CheckNonImageClassesRemoved() {
@@ -643,13 +789,13 @@
 
 void ImageWriter::CheckNonImageClassesRemovedCallback(Object* obj, void* arg) {
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-  if (obj->IsClass()) {
+  if (obj->IsClass() && !image_writer->IsInBootImage(obj)) {
     Class* klass = obj->AsClass();
-    if (!image_writer->IsImageClass(klass)) {
+    if (!image_writer->KeepClass(klass)) {
       image_writer->DumpImageClasses();
       std::string temp;
-      CHECK(image_writer->IsImageClass(klass)) << klass->GetDescriptor(&temp)
-                                               << " " << PrettyDescriptor(klass);
+      CHECK(image_writer->KeepClass(klass)) << klass->GetDescriptor(&temp)
+                                            << " " << PrettyDescriptor(klass);
     }
   }
 }
@@ -703,25 +849,38 @@
   // ObjectArray, we lock the dex lock twice, first to get the number
   // of dex caches first and then lock it again to copy the dex
   // caches. We check that the number of dex caches does not change.
-  size_t dex_cache_count;
+  size_t dex_cache_count = 0;
   {
     ReaderMutexLock mu(self, *class_linker->DexLock());
-    dex_cache_count = class_linker->GetDexCacheCount();
+    // Count number of dex caches not in the boot image.
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      dex_cache_count += IsInBootImage(dex_cache) ? 0u : 1u;
+    }
   }
   Handle<ObjectArray<Object>> dex_caches(
-      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(),
-                                              dex_cache_count)));
+      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), dex_cache_count)));
   CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array.";
   {
     ReaderMutexLock mu(self, *class_linker->DexLock());
-    CHECK_EQ(dex_cache_count, class_linker->GetDexCacheCount())
-        << "The number of dex caches changed.";
-    size_t i = 0;
-    for (jobject weak_root : class_linker->GetDexCaches()) {
+    size_t non_image_dex_caches = 0;
+    // Re-count number of non image dex caches.
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
       mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-      dex_caches->Set<false>(i, dex_cache);
-      ++i;
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      non_image_dex_caches += IsInBootImage(dex_cache) ? 0u : 1u;
+    }
+    CHECK_EQ(dex_cache_count, non_image_dex_caches)
+        << "The number of non-image dex caches changed.";
+    size_t i = 0;
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (!IsInBootImage(dex_cache)) {
+        dex_caches->Set<false>(i, dex_cache);
+        ++i;
+      }
     }
   }
 
@@ -761,6 +920,10 @@
 
 // For an unvisited object, visit it then all its children found via fields.
 void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
+  if (IsInBootImage(obj)) {
+    // Object is in the image, don't need to fix it up.
+    return;
+  }
   // Use our own visitor routine (instead of GC visitor) to get better locality between
   // an object and its fields
   if (!IsImageBinSlotAssigned(obj)) {
@@ -797,9 +960,10 @@
           CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields
                                                   << " already forwarded";
           size_t& offset = bin_slot_sizes_[kBinArtField];
+          DCHECK(!IsInBootImage(cur_fields));
           native_object_relocations_.emplace(
-              cur_fields, NativeObjectRelocation {
-                  offset, kNativeObjectRelocationTypeArtFieldArray });
+              cur_fields,
+              NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtFieldArray });
           offset += header_size;
           // Forward individual fields so that we can quickly find where they belong.
           for (size_t i = 0, count = cur_fields->size(); i < count; ++i) {
@@ -808,8 +972,10 @@
             auto it2 = native_object_relocations_.find(field);
             CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i
                 << " already assigned " << PrettyField(field) << " static=" << field->IsStatic();
+            DCHECK(!IsInBootImage(field));
             native_object_relocations_.emplace(
-                field, NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField });
+                field,
+                NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField });
             offset += sizeof(ArtField);
           }
         }
@@ -832,8 +998,9 @@
           any_dirty = any_dirty || WillMethodBeDirty(&m);
           ++count;
         }
-        NativeObjectRelocationType type = any_dirty ? kNativeObjectRelocationTypeArtMethodDirty :
-            kNativeObjectRelocationTypeArtMethodClean;
+        NativeObjectRelocationType type = any_dirty
+            ? kNativeObjectRelocationTypeArtMethodDirty
+            : kNativeObjectRelocationTypeArtMethodClean;
         Bin bin_type = BinTypeForNativeRelocationType(type);
         // Forward the entire array at once, but header first.
         const size_t header_size = LengthPrefixedArray<ArtMethod>::ComputeSize(0,
@@ -843,6 +1010,7 @@
         CHECK(it == native_object_relocations_.end()) << "Method array " << array
             << " already forwarded";
         size_t& offset = bin_slot_sizes_[bin_type];
+        DCHECK(!IsInBootImage(array));
         native_object_relocations_.emplace(array, NativeObjectRelocation { offset,
             any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty :
                 kNativeObjectRelocationTypeArtMethodArrayClean });
@@ -867,6 +1035,7 @@
 }
 
 void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) {
+  DCHECK(!IsInBootImage(method));
   auto it = native_object_relocations_.find(method);
   CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned "
       << PrettyMethod(method);
@@ -884,10 +1053,13 @@
 void ImageWriter::UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg) {
   ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
   DCHECK(writer != nullptr);
-  writer->UnbinObjectsIntoOffset(obj);
+  if (!writer->IsInBootImage(obj)) {
+    writer->UnbinObjectsIntoOffset(obj);
+  }
 }
 
 void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) {
+  DCHECK(!IsInBootImage(obj));
   CHECK(obj != nullptr);
 
   // We know the bin slot, and the total bin sizes for all objects by now,
@@ -925,13 +1097,15 @@
   image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
       runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
 
-  // Add room for fake length prefixed array.
+  // Add room for fake length prefixed array for holding the image methods.
   const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean;
   auto it = native_object_relocations_.find(&image_method_array_);
   CHECK(it == native_object_relocations_.end());
   size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
-  native_object_relocations_.emplace(&image_method_array_,
-                                     NativeObjectRelocation { offset, image_method_type });
+  if (!compile_app_image_) {
+    native_object_relocations_.emplace(&image_method_array_,
+                                       NativeObjectRelocation { offset, image_method_type });
+  }
   size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
   const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize(
       0, ArtMethod::Size(target_ptr_size_), method_alignment);
@@ -940,7 +1114,10 @@
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
     CHECK(m->IsRuntimeMethod());
-    AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean);
+    DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image";
+    if (!IsInBootImage(m)) {
+      AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean);
+    }
   }
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
@@ -1006,8 +1183,9 @@
   cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_));
   // Add method section.
   auto* methods_section = &sections[ImageHeader::kSectionArtMethods];
-  *methods_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtMethodClean] +
-                                  bin_slot_sizes_[kBinArtMethodDirty]);
+  *methods_section = ImageSection(cur_pos,
+                                  bin_slot_sizes_[kBinArtMethodClean] +
+                                      bin_slot_sizes_[kBinArtMethodDirty]);
   CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset());
   cur_pos = methods_section->End();
   // Add dex cache arrays section.
@@ -1038,12 +1216,17 @@
   CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
       "Oat file should be right after the image.";
   // Create the header.
-  new (image_->Begin()) ImageHeader(
-      PointerToLowMemUInt32(image_begin_), image_end,
-      sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(),
-      PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_),
-      PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_,
-      compile_pic_);
+  new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_),
+                                                          image_end,
+                                                          sections,
+                                                          image_roots_address_,
+                                                          oat_file_->GetOatHeader().GetChecksum(),
+                                                          PointerToLowMemUInt32(oat_file_begin),
+                                                          PointerToLowMemUInt32(oat_data_begin_),
+                                                          PointerToLowMemUInt32(oat_data_end),
+                                                          PointerToLowMemUInt32(oat_file_end),
+                                                          target_ptr_size_,
+                                                          compile_pic_);
 }
 
 ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) {
@@ -1090,6 +1273,7 @@
     NativeObjectRelocation& relocation = pair.second;
     auto* dest = image_->Begin() + relocation.offset;
     DCHECK_GE(dest, image_->Begin() + image_end_);
+    DCHECK(!IsInBootImage(pair.first));
     switch (relocation.type) {
       case kNativeObjectRelocationTypeArtField: {
         memcpy(dest, pair.first, sizeof(ArtField));
@@ -1126,16 +1310,18 @@
   auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   const ImageSection& methods_section = image_header->GetMethodsSection();
   for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) {
-    auto* m = image_methods_[i];
-    CHECK(m != nullptr);
-    auto it = native_object_relocations_.find(m);
-    CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(m);
-    NativeObjectRelocation& relocation = it->second;
-    CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
-        << methods_section;
-    CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
-    auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
-    image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest);
+    ArtMethod* method = image_methods_[i];
+    CHECK(method != nullptr);
+    if (!IsInBootImage(method)) {
+      auto it = native_object_relocations_.find(method);
+      CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(method);
+      NativeObjectRelocation& relocation = it->second;
+      CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
+          << methods_section;
+      CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
+      method = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
+    }
+    image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
   // Write the intern table into the image.
   const ImageSection& intern_table_section = image_header->GetImageSection(
@@ -1183,8 +1369,8 @@
   dst->SetClass(GetImageAddress(arr->GetClass()));
   auto* dest_array = down_cast<mirror::PointerArray*>(dst);
   for (size_t i = 0, count = num_elements; i < count; ++i) {
-    auto* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
-    if (elem != nullptr) {
+    void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
+    if (elem != nullptr && !IsInBootImage(elem)) {
       auto it = native_object_relocations_.find(elem);
       if (UNLIKELY(it == native_object_relocations_.end())) {
         if (it->second.IsArtMethodRelocation()) {
@@ -1209,6 +1395,9 @@
 }
 
 void ImageWriter::CopyAndFixupObject(Object* obj) {
+  if (IsInBootImage(obj)) {
+    return;
+  }
   size_t offset = GetImageOffset(obj);
   auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset);
   DCHECK_LT(offset, image_end_);
@@ -1247,14 +1436,16 @@
     // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
     // image.
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        offset, image_writer_->GetImageAddress(ref));
+        offset,
+        image_writer_->GetImageAddress(ref));
   }
 
   // java.lang.ref.Reference visitor.
   void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        mirror::Reference::ReferentOffset(), image_writer_->GetImageAddress(ref->GetReferent()));
+        mirror::Reference::ReferentOffset(),
+        image_writer_->GetImageAddress(ref->GetReferent()));
   }
 
  protected:
@@ -1282,44 +1473,43 @@
 
 uintptr_t ImageWriter::NativeOffsetInImage(void* obj) {
   DCHECK(obj != nullptr);
+  DCHECK(!IsInBootImage(obj));
   auto it = native_object_relocations_.find(obj);
-  CHECK(it != native_object_relocations_.end()) << obj;
+  CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+      << Runtime::Current()->GetHeap()->DumpSpaces();
   const NativeObjectRelocation& relocation = it->second;
   return relocation.offset;
 }
 
 template <typename T>
 T* ImageWriter::NativeLocationInImage(T* obj) {
-  if (obj == nullptr) {
-    return nullptr;
-  }
-  return reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj));
+  return (obj == nullptr || IsInBootImage(obj))
+      ? obj
+      : reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj));
 }
 
-void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
-  // Update the field arrays.
-  copy->SetSFieldsPtrUnchecked(NativeLocationInImage(orig->GetSFieldsPtr()));
-  copy->SetIFieldsPtrUnchecked(NativeLocationInImage(orig->GetIFieldsPtr()));
-  // Update direct and virtual method arrays.
-  copy->SetDirectMethodsPtrUnchecked(NativeLocationInImage(orig->GetDirectMethodsPtr()));
-  copy->SetVirtualMethodsPtr(NativeLocationInImage(orig->GetVirtualMethodsPtr()));
-  // Update dex cache strings.
-  copy->SetDexCacheStrings(NativeLocationInImage(orig->GetDexCacheStrings()));
-  // Fix up embedded tables.
-  if (orig->ShouldHaveEmbeddedImtAndVTable()) {
-    for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) {
-      auto it = native_object_relocations_.find(orig->GetEmbeddedVTableEntry(i, target_ptr_size_));
-      CHECK(it != native_object_relocations_.end()) << PrettyClass(orig);
-      copy->SetEmbeddedVTableEntryUnchecked(
-          i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_);
-    }
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      auto it = native_object_relocations_.find(orig->GetEmbeddedImTableEntry(i, target_ptr_size_));
-      CHECK(it != native_object_relocations_.end()) << PrettyClass(orig);
-      copy->SetEmbeddedImTableEntry(
-          i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_);
-    }
+template <typename T>
+T* ImageWriter::NativeCopyLocation(T* obj) {
+  return (obj == nullptr || IsInBootImage(obj))
+      ? obj
+      : reinterpret_cast<T*>(image_->Begin() + NativeOffsetInImage(obj));
+}
+
+class NativeLocationVisitor {
+ public:
+  explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+
+  template <typename T>
+  T* operator()(T* ptr) const {
+    return image_writer_->NativeLocationInImage(ptr);
   }
+
+ private:
+  ImageWriter* const image_writer_;
+};
+
+void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
+  orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
   FixupClassVisitor visitor(this, copy);
   static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
 }
@@ -1379,6 +1569,21 @@
   }
 }
 
+
+class ImageAddressVisitor {
+ public:
+  explicit ImageAddressVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+
+  template <typename T>
+  T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return image_writer_->GetImageAddress(ptr);
+  }
+
+ private:
+  ImageWriter* const image_writer_;
+};
+
+
 void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
                                 mirror::DexCache* copy_dex_cache) {
   // Though the DexCache array fields are usually treated as native pointers, we set the full
@@ -1387,36 +1592,24 @@
   //     static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))).
   GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings();
   if (orig_strings != nullptr) {
-    uintptr_t copy_strings_offset = NativeOffsetInImage(orig_strings);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::StringsOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_strings_offset)));
-    GcRoot<mirror::String>* copy_strings =
-        reinterpret_cast<GcRoot<mirror::String>*>(image_->Begin() + copy_strings_offset);
-    for (size_t i = 0, num = orig_dex_cache->NumStrings(); i != num; ++i) {
-      copy_strings[i] = GcRoot<mirror::String>(GetImageAddress(orig_strings[i].Read()));
-    }
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
+                                               NativeLocationInImage(orig_strings),
+                                               /*pointer size*/8u);
+    orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings), ImageAddressVisitor(this));
   }
   GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
-    uintptr_t copy_types_offset = NativeOffsetInImage(orig_types);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::ResolvedTypesOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_types_offset)));
-    GcRoot<mirror::Class>* copy_types =
-        reinterpret_cast<GcRoot<mirror::Class>*>(image_->Begin() + copy_types_offset);
-    for (size_t i = 0, num = orig_dex_cache->NumResolvedTypes(); i != num; ++i) {
-      copy_types[i] = GcRoot<mirror::Class>(GetImageAddress(orig_types[i].Read()));
-    }
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(),
+                                               NativeLocationInImage(orig_types),
+                                               /*pointer size*/8u);
+    orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types), ImageAddressVisitor(this));
   }
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
-    uintptr_t copy_methods_offset = NativeOffsetInImage(orig_methods);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::ResolvedMethodsOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_methods_offset)));
-    ArtMethod** copy_methods =
-        reinterpret_cast<ArtMethod**>(image_->Begin() + copy_methods_offset);
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(),
+                                               NativeLocationInImage(orig_methods),
+                                               /*pointer size*/8u);
+    ArtMethod** copy_methods = NativeCopyLocation(orig_methods);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
       ArtMethod* copy = NativeLocationInImage(orig);
@@ -1425,11 +1618,10 @@
   }
   ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
-    uintptr_t copy_fields_offset = NativeOffsetInImage(orig_fields);
-    copy_dex_cache->SetField64<false>(
-        mirror::DexCache::ResolvedFieldsOffset(),
-        static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + copy_fields_offset)));
-    ArtField** copy_fields = reinterpret_cast<ArtField**>(image_->Begin() + copy_fields_offset);
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
+                                               NativeLocationInImage(orig_fields),
+                                               /*pointer size*/8u);
+    ArtField** copy_fields = NativeCopyLocation(orig_fields);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
       ArtField* copy = NativeLocationInImage(orig);
@@ -1438,9 +1630,45 @@
   }
 }
 
+const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
+  DCHECK_LT(type, kOatAddressCount);
+  // If we are compiling an app image, we need to use the stubs of the boot image.
+  if (compile_app_image_) {
+    // Use the current image pointers.
+    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
+    DCHECK(image_space != nullptr);
+    const OatFile* oat_file = image_space->GetOatFile();
+    CHECK(oat_file != nullptr);
+    const OatHeader& header = oat_file->GetOatHeader();
+    switch (type) {
+      // TODO: We could maybe clean this up if we stored them in an array in the oat header.
+      case kOatAddressQuickGenericJNITrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickGenericJniTrampoline());
+      case kOatAddressInterpreterToInterpreterBridge:
+        return static_cast<const uint8_t*>(header.GetInterpreterToInterpreterBridge());
+      case kOatAddressInterpreterToCompiledCodeBridge:
+        return static_cast<const uint8_t*>(header.GetInterpreterToCompiledCodeBridge());
+      case kOatAddressJNIDlsymLookup:
+        return static_cast<const uint8_t*>(header.GetJniDlsymLookup());
+      case kOatAddressQuickIMTConflictTrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickImtConflictTrampoline());
+      case kOatAddressQuickResolutionTrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickResolutionTrampoline());
+      case kOatAddressQuickToInterpreterBridge:
+        return static_cast<const uint8_t*>(header.GetQuickToInterpreterBridge());
+      default:
+        UNREACHABLE();
+    }
+  }
+  return GetOatAddressForOffset(oat_address_offsets_[type]);
+}
+
 const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) {
-  DCHECK(!method->IsResolutionMethod() && !method->IsImtConflictMethod() &&
-         !method->IsImtUnimplementedMethod() && !method->IsAbstract()) << PrettyMethod(method);
+  DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method);
+  DCHECK(!method->IsImtConflictMethod()) << PrettyMethod(method);
+  DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method);
+  DCHECK(method->IsInvokable()) << PrettyMethod(method);
+  DCHECK(!IsInBootImage(method)) << PrettyMethod(method);
 
   // Use original code if it exists. Otherwise, set the code pointer to the resolution
   // trampoline.
@@ -1448,27 +1676,26 @@
   // Quick entrypoint:
   uint32_t quick_oat_code_offset = PointerToLowMemUInt32(
       method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_));
-  const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset);
+  const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset);
   *quick_is_interpreted = false;
   if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() ||
       method->GetDeclaringClass()->IsInitialized())) {
     // We have code for a non-static or initialized method, just use the code.
-    DCHECK_GE(quick_code, oat_data_begin_);
   } else if (quick_code == nullptr && method->IsNative() &&
       (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) {
     // Non-static or initialized native method missing compiled code, use generic JNI version.
-    quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
-    DCHECK_GE(quick_code, oat_data_begin_);
+    quick_code = GetOatAddress(kOatAddressQuickGenericJNITrampoline);
   } else if (quick_code == nullptr && !method->IsNative()) {
     // We don't have code at all for a non-native method, use the interpreter.
-    quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+    quick_code = GetOatAddress(kOatAddressQuickToInterpreterBridge);
     *quick_is_interpreted = true;
-    DCHECK_GE(quick_code, oat_data_begin_);
   } else {
     CHECK(!method->GetDeclaringClass()->IsInitialized());
     // We have code for a static method, but need to go through the resolution stub for class
     // initialization.
-    quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
+    quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline);
+  }
+  if (!IsInBootOatFile(quick_code)) {
     DCHECK_GE(quick_code, oat_data_begin_);
   }
   return quick_code;
@@ -1479,16 +1706,16 @@
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
   if (UNLIKELY(method == runtime->GetResolutionMethod())) {
-    return GetOatAddress(quick_resolution_trampoline_offset_);
+    return GetOatAddress(kOatAddressQuickResolutionTrampoline);
   } else if (UNLIKELY(method == runtime->GetImtConflictMethod() ||
                       method == runtime->GetImtUnimplementedMethod())) {
-    return GetOatAddress(quick_imt_conflict_trampoline_offset_);
+    return GetOatAddress(kOatAddressQuickIMTConflictTrampoline);
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
-    if (UNLIKELY(method->IsAbstract())) {
-      return GetOatAddress(quick_to_interpreter_bridge_offset_);
+    if (UNLIKELY(!method->IsInvokable())) {
+      return GetOatAddress(kOatAddressQuickToInterpreterBridge);
     } else {
       bool quick_is_interpreted;
       return GetQuickCode(method, &quick_is_interpreted);
@@ -1513,11 +1740,11 @@
   Runtime* runtime = Runtime::Current();
   if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
     copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(quick_resolution_trampoline_offset_), target_ptr_size_);
+        GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
   } else if (UNLIKELY(orig == runtime->GetImtConflictMethod() ||
                       orig == runtime->GetImtUnimplementedMethod())) {
     copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(quick_imt_conflict_trampoline_offset_), target_ptr_size_);
+        GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_);
   } else if (UNLIKELY(orig->IsRuntimeMethod())) {
     bool found_one = false;
     for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
@@ -1533,9 +1760,9 @@
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
-    if (UNLIKELY(orig->IsAbstract())) {
+    if (UNLIKELY(!orig->IsInvokable())) {
       copy->SetEntryPointFromQuickCompiledCodePtrSize(
-          GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_);
+          GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_);
     } else {
       bool quick_is_interpreted;
       const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted);
@@ -1546,7 +1773,7 @@
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
         copy->SetEntryPointFromJniPtrSize(
-            GetOatAddress(jni_dlsym_lookup_offset_), target_ptr_size_);
+            GetOatAddress(kOatAddressJNIDlsymLookup), target_ptr_size_);
       }
     }
   }
@@ -1563,8 +1790,10 @@
 
 void ImageWriter::SetOatChecksumFromElfFile(File* elf_file) {
   std::string error_msg;
-  std::unique_ptr<ElfFile> elf(ElfFile::Open(elf_file, PROT_READ|PROT_WRITE,
-                                             MAP_SHARED, &error_msg));
+  std::unique_ptr<ElfFile> elf(ElfFile::Open(elf_file,
+                                             PROT_READ | PROT_WRITE,
+                                             MAP_SHARED,
+                                             &error_msg));
   if (elf.get() == nullptr) {
     LOG(FATAL) << "Unable open oat file: " << error_msg;
     return;
@@ -1607,10 +1836,11 @@
 
 uint8_t* ImageWriter::GetOatFileBegin() const {
   DCHECK_GT(intern_table_bytes_, 0u);
-  size_t native_sections_size =
-      bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] +
-      bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinDexCacheArray] +
-      intern_table_bytes_;
+  size_t native_sections_size = bin_slot_sizes_[kBinArtField] +
+                                bin_slot_sizes_[kBinArtMethodDirty] +
+                                bin_slot_sizes_[kBinArtMethodClean] +
+                                bin_slot_sizes_[kBinDexCacheArray] +
+                                intern_table_bytes_;
   return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize);
 }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 7a2febc..889cd10 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -40,27 +40,42 @@
 #include "utils.h"
 
 namespace art {
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
 
 static constexpr int kInvalidImageFd = -1;
 
 // Write a Space built during compilation for use during execution.
 class ImageWriter FINAL {
  public:
-  ImageWriter(const CompilerDriver& compiler_driver, uintptr_t image_begin,
-              bool compile_pic)
-      : compiler_driver_(compiler_driver), image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
-        image_end_(0), image_objects_offset_begin_(0), image_roots_address_(0), oat_file_(nullptr),
-        oat_data_begin_(nullptr), interpreter_to_interpreter_bridge_offset_(0),
-        interpreter_to_compiled_code_bridge_offset_(0), jni_dlsym_lookup_offset_(0),
-        quick_generic_jni_trampoline_offset_(0),
-        quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
-        quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
+  ImageWriter(const CompilerDriver& compiler_driver,
+              uintptr_t image_begin,
+              bool compile_pic,
+              bool compile_app_image)
+      : compiler_driver_(compiler_driver),
+        image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
+        image_end_(0),
+        image_objects_offset_begin_(0),
+        image_roots_address_(0),
+        oat_file_(nullptr),
+        oat_data_begin_(nullptr),
+        compile_pic_(compile_pic),
+        compile_app_image_(compile_app_image),
+        boot_image_space_(nullptr),
         target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
-        bin_slot_sizes_(), bin_slot_offsets_(), bin_slot_count_(),
-        intern_table_bytes_(0u), image_method_array_(ImageHeader::kImageMethodsCount),
-        dirty_methods_(0u), clean_methods_(0u) {
+        bin_slot_sizes_(),
+        bin_slot_offsets_(),
+        bin_slot_count_(),
+        intern_table_bytes_(0u),
+        image_method_array_(ImageHeader::kImageMethodsCount),
+        dirty_methods_(0u),
+        clean_methods_(0u) {
     CHECK_NE(image_begin, 0U);
-    std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr);
+    std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+    std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
   }
 
   ~ImageWriter() {
@@ -74,8 +89,9 @@
 
   template <typename T>
   T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return object == nullptr ? nullptr :
-        reinterpret_cast<T*>(image_begin_ + GetImageOffset(object));
+    return (object == nullptr || IsInBootImage(object))
+        ? object
+        : reinterpret_cast<T*>(image_begin_ + GetImageOffset(object));
   }
 
   ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -150,6 +166,19 @@
   };
   friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type);
 
+  enum OatAddress {
+    kOatAddressInterpreterToInterpreterBridge,
+    kOatAddressInterpreterToCompiledCodeBridge,
+    kOatAddressJNIDlsymLookup,
+    kOatAddressQuickGenericJNITrampoline,
+    kOatAddressQuickIMTConflictTrampoline,
+    kOatAddressQuickResolutionTrampoline,
+    kOatAddressQuickToInterpreterBridge,
+    // Number of elements in the enum.
+    kOatAddressCount,
+  };
+  friend std::ostream& operator<<(std::ostream& stream, const OatAddress& oat_address);
+
   static constexpr size_t kBinBits = MinimumBitsToStore<uint32_t>(kBinMirrorCount - 1);
   // uint32 = typeof(lockword_)
   // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
@@ -215,7 +244,10 @@
     return reinterpret_cast<mirror::Object*>(dst);
   }
 
-  const uint8_t* GetOatAddress(uint32_t offset) const {
+  // Returns the address in the boot image if we are compiling the app image.
+  const uint8_t* GetOatAddress(OatAddress type) const;
+
+  const uint8_t* GetOatAddressForOffset(uint32_t offset) const {
     // With Quick, code is within the OatFile, as there are all in one
     // .o ELF object.
     DCHECK_LE(offset, oat_file_->Size());
@@ -224,7 +256,7 @@
   }
 
   // Returns true if the class was in the original requested image classes list.
-  bool IsImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool KeepClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Debug aid that list of requested image classes.
   void DumpImageClasses();
@@ -276,8 +308,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupDexCache(mirror::DexCache* orig_dex_cache, mirror::DexCache* copy_dex_cache)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr, mirror::Class* klass,
-                         Bin array_type) SHARED_REQUIRES(Locks::mutator_lock_);
+  void FixupPointerArray(mirror::Object* dst,
+                         mirror::PointerArray* arr,
+                         mirror::Class* klass,
+                         Bin array_type)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Get quick code for non-resolution/imt_conflict/abstract method.
   const uint8_t* GetQuickCode(ArtMethod* method, bool* quick_is_interpreted)
@@ -299,13 +334,40 @@
   void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Return true if klass is loaded by the boot class loader but not in the boot image.
+  bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Return true if klass depends on a boot class loader non image class live. We want to prune
+  // these classes since we do not want any boot class loader classes in the image. This means that
+  // we also cannot have any classes which refer to these boot class loader non image classes.
+  bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // early_exit is true if we had a cyclic dependency anywhere down the chain.
+  bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass,
+                                                    bool* early_exit,
+                                                    std::unordered_set<mirror::Class*>* visited)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
 
   uintptr_t NativeOffsetInImage(void* obj);
 
+  // Location of where the object will be when the image is loaded at runtime.
   template <typename T>
   T* NativeLocationInImage(T* obj);
 
+  // Location of where the temporary copy of the object currently is.
+  template <typename T>
+  T* NativeCopyLocation(T* obj);
+
+  // Return true of obj is inside of the boot image space. This may only return true if we are
+  // compiling an app image.
+  bool IsInBootImage(const void* obj) const;
+
+  // Return true if ptr is within the boot oat file.
+  bool IsInBootOatFile(const void* ptr) const;
+
   const CompilerDriver& compiler_driver_;
 
   // Beginning target image address for the output image.
@@ -344,14 +406,14 @@
   std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
 
   // Offset from oat_data_begin_ to the stubs.
-  uint32_t interpreter_to_interpreter_bridge_offset_;
-  uint32_t interpreter_to_compiled_code_bridge_offset_;
-  uint32_t jni_dlsym_lookup_offset_;
-  uint32_t quick_generic_jni_trampoline_offset_;
-  uint32_t quick_imt_conflict_trampoline_offset_;
-  uint32_t quick_resolution_trampoline_offset_;
-  uint32_t quick_to_interpreter_bridge_offset_;
+  uint32_t oat_address_offsets_[kOatAddressCount];
+
+  // Boolean flags.
   const bool compile_pic_;
+  const bool compile_app_image_;
+
+  // Cache the boot image space in this class for faster lookups.
+  gc::space::ImageSpace* boot_image_space_;
 
   // Size of pointers on the target architecture.
   size_t target_ptr_size_;
@@ -388,9 +450,14 @@
   uint64_t dirty_methods_;
   uint64_t clean_methods_;
 
+  // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
+  std::unordered_map<mirror::Class*, bool> prune_class_memo_;
+
+  friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
   friend class FixupVisitor;
+  friend class NativeLocationVisitor;
   friend class NonImageClassesVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
 };
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index d520208..2125c9a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -177,7 +177,8 @@
   }
 
   // Don't compile the method if we are supposed to be deoptimized.
-  if (runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
+  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
     return false;
   }
 
@@ -189,13 +190,14 @@
   }
 
   // Do the compilation.
-  CompiledMethod* compiled_method = nullptr;
+  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
+  bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
-    compiled_method = compiler_driver_->CompileArtMethod(self, method_to_compile);
+    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
   // Trim maps to reduce memory usage.
@@ -205,105 +207,14 @@
     runtime->GetArenaPool()->TrimMaps();
   }
 
-  // Check if we failed compiling.
-  if (compiled_method == nullptr) {
-    return false;
-  }
-
   total_time_ += NanoTime() - start_time;
-  bool result = false;
-  const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(method);
-
-  if (code != nullptr) {
-    // Already have some compiled code, just use this instead of linking.
-    // TODO: Fix recompilation.
-    method->SetEntryPointFromQuickCompiledCode(code);
-    result = true;
-  } else {
-    TimingLogger::ScopedTiming t2("LinkCode", &logger);
-    if (AddToCodeCache(method, compiled_method)) {
-      result = true;
-    }
-  }
-
-  // Remove the compiled method to save memory.
-  compiler_driver_->RemoveCompiledMethod(
-      MethodReference(h_class->GetDexCache()->GetDexFile(), method->GetDexMethodIndex()));
   runtime->GetJit()->AddTimingLogger(logger);
-  return result;
+  return success;
 }
 
 CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const {
   return callbacks_.get();
 }
 
-bool JitCompiler::AddToCodeCache(ArtMethod* method,
-                                 const CompiledMethod* compiled_method) {
-  Runtime* runtime = Runtime::Current();
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-  auto const quick_code = compiled_method->GetQuickCode();
-  if (quick_code.empty()) {
-    return false;
-  }
-  const auto code_size = quick_code.size();
-  Thread* const self = Thread::Current();
-  auto const mapping_table = compiled_method->GetMappingTable();
-  auto const vmap_table = compiled_method->GetVmapTable();
-  auto const gc_map = compiled_method->GetGcMap();
-  uint8_t* mapping_table_ptr = nullptr;
-  uint8_t* vmap_table_ptr = nullptr;
-  uint8_t* gc_map_ptr = nullptr;
-
-  if (!mapping_table.empty()) {
-    // Write out pre-header stuff.
-    mapping_table_ptr = code_cache->AddDataArray(
-        self, mapping_table.data(), mapping_table.data() + mapping_table.size());
-    if (mapping_table_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  if (!vmap_table.empty()) {
-    vmap_table_ptr = code_cache->AddDataArray(
-        self, vmap_table.data(), vmap_table.data() + vmap_table.size());
-    if (vmap_table_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  if (!gc_map.empty()) {
-    gc_map_ptr = code_cache->AddDataArray(
-        self, gc_map.data(), gc_map.data() + gc_map.size());
-    if (gc_map_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  uint8_t* const code = code_cache->CommitCode(self,
-                                               method,
-                                               mapping_table_ptr,
-                                               vmap_table_ptr,
-                                               gc_map_ptr,
-                                               compiled_method->GetFrameSizeInBytes(),
-                                               compiled_method->GetCoreSpillMask(),
-                                               compiled_method->GetFpSpillMask(),
-                                               compiled_method->GetQuickCode().data(),
-                                               compiled_method->GetQuickCode().size());
-
-  if (code == nullptr) {
-    return false;
-  }
-
-  const size_t thumb_offset = compiled_method->CodeDelta();
-  const uint32_t code_offset = sizeof(OatQuickMethodHeader) + thumb_offset;
-  VLOG(jit)
-      << "JIT added "
-      << PrettyMethod(method) << "@" << method
-      << " ccache_size=" << PrettySize(code_cache->CodeCacheSize()) << ": "
-      << reinterpret_cast<void*>(code + code_offset)
-      << "," << reinterpret_cast<void*>(code + code_offset + code_size);
-  return true;
-}
-
 }  // namespace jit
 }  // namespace art
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 0bfe8a2..8832c84 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -51,6 +51,7 @@
 
     // Assemble the method.
     std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+    jni_asm->cfi().SetEnabled(true);
     jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
                         callee_save_regs, mr_conv->EntrySpills());
     jni_asm->IncreaseFrameSize(32);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index ea3cb66..c305b12 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -16,6 +16,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiled_method.h"
@@ -26,6 +27,9 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
@@ -37,6 +41,16 @@
 
 namespace art {
 
+NO_RETURN static void Usage(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(FATAL) << error;
+  va_end(ap);
+  UNREACHABLE();
+}
+
 class OatTest : public CommonCompilerTest {
  protected:
   static const bool kCompile = false;  // DISABLED_ due to the time to compile libcore
@@ -71,6 +85,92 @@
       CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
     }
   }
+
+  void SetupCompiler(Compiler::Kind compiler_kind,
+                     InstructionSet insn_set,
+                     const std::vector<std::string>& compiler_options,
+                     /*out*/std::string* error_msg) {
+    ASSERT_TRUE(error_msg != nullptr);
+    insn_features_.reset(InstructionSetFeatures::FromVariant(insn_set, "default", error_msg));
+    ASSERT_TRUE(insn_features_ != nullptr) << error_msg;
+    compiler_options_.reset(new CompilerOptions);
+    for (const std::string& option : compiler_options) {
+      compiler_options_->ParseCompilerOption(option, Usage);
+    }
+    verification_results_.reset(new VerificationResults(compiler_options_.get()));
+    method_inliner_map_.reset(new DexFileToMethodInlinerMap);
+    callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
+                                                method_inliner_map_.get(),
+                                                CompilerCallbacks::CallbackMode::kCompileApp));
+    Runtime::Current()->SetCompilerCallbacks(callbacks_.get());
+    timer_.reset(new CumulativeLogger("Compilation times"));
+    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                              verification_results_.get(),
+                                              method_inliner_map_.get(),
+                                              compiler_kind,
+                                              insn_set,
+                                              insn_features_.get(),
+                                              false,
+                                              nullptr,
+                                              nullptr,
+                                              nullptr,
+                                              2,
+                                              true,
+                                              true,
+                                              "",
+                                              false,
+                                              timer_.get(),
+                                              -1,
+                                              ""));
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const DexFile*>& dex_files,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(dex_files,
+                         42U,
+                         4096U,
+                         0,
+                         compiler_driver_.get(),
+                         nullptr,
+                         /*compiling_boot_image*/false,
+                         &timings,
+                         &key_value_store);
+    std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+        compiler_driver_->GetInstructionSet(),
+        &compiler_driver_->GetCompilerOptions(),
+        file);
+
+    elf_writer->Start();
+
+    OutputStream* rodata = elf_writer->StartRoData();
+    if (!oat_writer.WriteRodata(rodata)) {
+      return false;
+    }
+    elf_writer->EndRoData(rodata);
+
+    OutputStream* text = elf_writer->StartText();
+    if (!oat_writer.WriteCode(text)) {
+      return false;
+    }
+    elf_writer->EndText(text);
+
+    elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+    elf_writer->WriteDynamicSection();
+
+    ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+    elf_writer->WriteDebugInfo(method_infos);
+
+    ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+    elf_writer->WritePatchLocations(patch_locations);
+
+    return elf_writer->End();
+  }
+
+  std::unique_ptr<const InstructionSetFeatures> insn_features_;
+  std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 };
 
 TEST_F(OatTest, WriteRead) {
@@ -80,21 +180,9 @@
   // TODO: make selectable.
   Compiler::Kind compiler_kind = Compiler::kQuick;
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
-
   std::string error_msg;
-  std::unique_ptr<const InstructionSetFeatures> insn_features(
-      InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
-  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-  compiler_options_.reset(new CompilerOptions);
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
-  method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  timer_.reset(new CumulativeLogger("Compilation times"));
-  compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                            verification_results_.get(),
-                                            method_inliner_map_.get(),
-                                            compiler_kind, insn_set,
-                                            insn_features.get(), false, nullptr, nullptr, nullptr,
-                                            2, true, true, "", false, timer_.get(), -1, ""));
+  SetupCompiler(compiler_kind, insn_set, std::vector<std::string>(), /*out*/ &error_msg);
+
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
@@ -105,19 +193,7 @@
   ScratchFile tmp;
   SafeMap<std::string, std::string> key_value_store;
   key_value_store.Put(OatHeader::kImageLocationKey, "lue.art");
-  OatWriter oat_writer(class_linker->GetBootClassPath(),
-                       42U,
-                       4096U,
-                       0,
-                       compiler_driver_.get(),
-                       nullptr,
-                       &timings,
-                       &key_value_store);
-  bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                            !kIsTargetBuild,
-                                            class_linker->GetBootClassPath(),
-                                            &oat_writer,
-                                            tmp.GetFile());
+  bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store);
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
@@ -184,7 +260,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(113 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(114 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
@@ -212,4 +288,53 @@
     ASSERT_FALSE(oat_header->IsValid());
 }
 
+TEST_F(OatTest, EmptyTextSection) {
+  TimingLogger timings("OatTest::EmptyTextSection", false, false);
+
+  // TODO: make selectable.
+  Compiler::Kind compiler_kind = Compiler::kQuick;
+  InstructionSet insn_set = kRuntimeISA;
+  if (insn_set == kArm) insn_set = kThumb2;
+  std::string error_msg;
+  std::vector<std::string> compiler_options;
+  compiler_options.push_back("--compiler-filter=verify-at-runtime");
+  SetupCompiler(compiler_kind, insn_set, compiler_options, /*out*/ &error_msg);
+
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    class_loader = LoadDex("Main");
+  }
+  ASSERT_TRUE(class_loader != nullptr);
+  std::vector<const DexFile*> dex_files = GetDexFiles(class_loader);
+  ASSERT_TRUE(!dex_files.empty());
+
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  for (const DexFile* dex_file : dex_files) {
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker->RegisterDexFile(
+        *dex_file,
+        class_linker->GetOrCreateAllocatorForClassLoader(
+            soa.Decode<mirror::ClassLoader*>(class_loader)));
+  }
+  compiler_driver_->SetDexFilesForOatFile(dex_files);
+  compiler_driver_->CompileAll(class_loader, dex_files, &timings);
+
+  ScratchFile tmp;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store);
+  ASSERT_TRUE(success);
+
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(),
+                                                  tmp.GetFilename(),
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  nullptr,
+                                                  &error_msg));
+  ASSERT_TRUE(oat_file != nullptr);
+  EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c7b8884..a6a49f9 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -31,6 +31,7 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
@@ -65,10 +66,12 @@
                      int32_t image_patch_delta,
                      const CompilerDriver* compiler,
                      ImageWriter* image_writer,
+                     bool compiling_boot_image,
                      TimingLogger* timings,
                      SafeMap<std::string, std::string>* key_value_store)
   : compiler_driver_(compiler),
     image_writer_(image_writer),
+    compiling_boot_image_(compiling_boot_image),
     dex_files_(&dex_files),
     size_(0u),
     bss_size_(0u),
@@ -113,7 +116,9 @@
     size_oat_lookup_table_(0),
     method_offset_map_() {
   CHECK(key_value_store != nullptr);
-
+  if (compiling_boot_image) {
+    CHECK(image_writer != nullptr);
+  }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
@@ -154,7 +159,7 @@
   }
   size_ = offset;
 
-  if (!HasImage()) {
+  if (!HasBootImage()) {
     // Allocate space for app dex cache arrays in the .bss section.
     size_t bss_start = RoundUp(size_, kPageSize);
     size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
@@ -167,9 +172,10 @@
   }
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
-  CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr);
-  CHECK_EQ(compiler->IsImage(),
-           key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+  if (compiling_boot_image_) {
+    CHECK_EQ(image_writer_ != nullptr,
+             key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+  }
   CHECK_ALIGNED(image_patch_delta_, kPageSize);
 }
 
@@ -480,7 +486,7 @@
         // Record debug information for this function if we are doing that.
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(DebugInfo {
+        writer_->method_info_.push_back(dwarf::MethodDebugInfo {
             dex_file_,
             class_def_index_,
             it.GetMemberIndex(),
@@ -672,7 +678,7 @@
       class_linker_(Runtime::Current()->GetClassLinker()),
       dex_cache_(nullptr) {
     patched_code_.reserve(16 * KB);
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       // If we're creating the image, the address space must be ready so that we can apply patches.
       CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
     }
@@ -855,7 +861,7 @@
   }
 
   uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>(
               patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
       const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
@@ -868,7 +874,7 @@
 
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       object = writer_->image_writer_->GetImageAddress(object);
     } else {
       // NOTE: We're using linker patches for app->boot references when the image can
@@ -888,13 +894,13 @@
 
   void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       method = writer_->image_writer_->GetImageMethodAddress(method);
     } else if (kIsDebugBuild) {
       // NOTE: We're using linker patches for app->boot references when the image can
       // be relocated and therefore we need to emit .oat_patches. We're not using this
       // for app->app references, so check that the method is an image method.
-      gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+      gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
       size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
       CHECK(image_space->GetImageHeader().GetMethodsSection().Contains(method_offset));
     }
@@ -911,7 +917,7 @@
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t address = target_offset;
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
                                       writer_->oat_data_offset_ + target_offset);
     }
@@ -1123,7 +1129,7 @@
   offset = RoundUp(offset, kPageSize);
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
@@ -1164,7 +1170,7 @@
     } while (false)
 
   VISIT(InitCodeMethodVisitor);
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     VISIT(InitImageMethodVisitor);
   }
 
@@ -1408,7 +1414,7 @@
 }
 
 size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field) \
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index f2fe048..6c46ebc 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -38,6 +38,10 @@
 class TimingLogger;
 class TypeLookupTable;
 
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
+
 // OatHeader         variable length with count of D OatDexFiles
 //
 // OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
@@ -93,6 +97,7 @@
             int32_t image_patch_delta,
             const CompilerDriver* compiler,
             ImageWriter* image_writer,
+            bool compiling_boot_image,
             TimingLogger* timings,
             SafeMap<std::string, std::string>* key_value_store);
 
@@ -103,6 +108,10 @@
     return image_writer_ != nullptr;
   }
 
+  bool HasBootImage() const {
+    return compiling_boot_image_;
+  }
+
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
   }
@@ -124,19 +133,7 @@
 
   ~OatWriter();
 
-  struct DebugInfo {
-    const DexFile* dex_file_;
-    size_t class_def_index_;
-    uint32_t dex_method_index_;
-    uint32_t access_flags_;
-    const DexFile::CodeItem *code_item_;
-    bool deduped_;
-    uint32_t low_pc_;
-    uint32_t high_pc_;
-    CompiledMethod* compiled_method_;
-  };
-
-  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
+  const std::vector<dwarf::MethodDebugInfo>& GetMethodDebugInfo() const {
     return method_info_;
   }
 
@@ -275,10 +272,11 @@
     DISALLOW_COPY_AND_ASSIGN(OatClass);
   };
 
-  std::vector<DebugInfo> method_info_;
+  std::vector<dwarf::MethodDebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
+  const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
   const std::vector<const DexFile*>* dex_files_;
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index f985745..f0cafc8 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -61,40 +61,6 @@
       && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
 }
 
-// Returns an instruction with the opposite boolean value from 'cond'.
-static HInstruction* GetOppositeCondition(HInstruction* cond) {
-  HGraph* graph = cond->GetBlock()->GetGraph();
-  ArenaAllocator* allocator = graph->GetArena();
-
-  if (cond->IsCondition()) {
-    HInstruction* lhs = cond->InputAt(0);
-    HInstruction* rhs = cond->InputAt(1);
-    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
-      case kCondEQ: return new (allocator) HEqual(lhs, rhs);
-      case kCondNE: return new (allocator) HNotEqual(lhs, rhs);
-      case kCondLT: return new (allocator) HLessThan(lhs, rhs);
-      case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs);
-      case kCondGT: return new (allocator) HGreaterThan(lhs, rhs);
-      case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs);
-      case kCondB:  return new (allocator) HBelow(lhs, rhs);
-      case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs);
-      case kCondA:  return new (allocator) HAbove(lhs, rhs);
-      case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs);
-    }
-  } else if (cond->IsIntConstant()) {
-    HIntConstant* int_const = cond->AsIntConstant();
-    if (int_const->IsZero()) {
-      return graph->GetIntConstant(1);
-    } else {
-      DCHECK(int_const->IsOne());
-      return graph->GetIntConstant(0);
-    }
-  }
-  // General case when 'cond' is another instruction of type boolean,
-  // as verified by SSAChecker.
-  return new (allocator) HBooleanNot(cond);
-}
-
 void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
   DCHECK(block->EndsWithIf());
 
@@ -126,10 +92,7 @@
 
   HInstruction* replacement;
   if (NegatesCondition(true_value, false_value)) {
-    replacement = GetOppositeCondition(if_condition);
-    if (replacement->GetBlock() == nullptr) {
-      block->InsertInstructionBefore(replacement, if_instruction);
-    }
+    replacement = graph_->InsertOppositeCondition(if_condition, if_instruction);
   } else if (PreservesCondition(true_value, false_value)) {
     replacement = if_condition;
   } else {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index bcc3240..a448302 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/arena_containers.h"
 #include "induction_var_range.h"
+#include "side_effects_analysis.h"
 #include "nodes.h"
 
 namespace art {
@@ -175,6 +176,24 @@
     return false;
   }
 
+  // Returns if it's certain this->bound > `bound`.
+  bool GreaterThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ > bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound < `bound`.
+  bool LessThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ < bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -252,157 +271,6 @@
   int32_t constant_;
 };
 
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
-  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
-      : induction_variable_(induction_variable),
-        found_array_length_(nullptr),
-        offset_low_(std::numeric_limits<int32_t>::max()),
-        offset_high_(std::numeric_limits<int32_t>::min()) {
-    Run();
-  }
-
-  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
-  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
-  int32_t GetOffsetLow() const { return offset_low_; }
-  int32_t GetOffsetHigh() const { return offset_high_; }
-
-  // Returns if `block` that is in loop_info may exit the loop, unless it's
-  // the loop header for loop_info.
-  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
-    DCHECK(loop_info->Contains(*block));
-    if (block == loop_info->GetHeader()) {
-      // Loop header of loop_info. Exiting loop is normal.
-      return false;
-    }
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      if (!loop_info->Contains(*successor)) {
-        // One of the successors exits the loop.
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
-    for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
-      if (!block->Dominates(back_edge)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void Run() {
-    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
-    HBasicBlock* block = it_loop.Current();
-    DCHECK(block == induction_variable_->GetBlock());
-    // Skip loop header. Since narrowed value range of a MonotonicValueRange only
-    // applies to the loop body (after the test at the end of the loop header).
-    it_loop.Advance();
-    for (; !it_loop.Done(); it_loop.Advance()) {
-      block = it_loop.Current();
-      DCHECK(block->IsInLoop());
-      if (!DominatesAllBackEdges(block, loop_info)) {
-        // In order not to trigger deoptimization unnecessarily, make sure
-        // that all array accesses collected are really executed in the loop.
-        // For array accesses in a branch inside the loop, don't collect the
-        // access. The bounds check in that branch might not be eliminated.
-        continue;
-      }
-      if (EarlyExit(block, loop_info)) {
-        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
-        // that the loop will loop through the full monotonic value range from
-        // initial_ to end_. So adding deoptimization might be too aggressive and can
-        // trigger deoptimization unnecessarily even if the loop won't actually throw
-        // AIOOBE.
-        found_array_length_ = nullptr;
-        return;
-      }
-      for (HInstruction* instruction = block->GetFirstInstruction();
-           instruction != nullptr;
-           instruction = instruction->GetNext()) {
-        if (!instruction->IsBoundsCheck()) {
-          continue;
-        }
-
-        HInstruction* length_value = instruction->InputAt(1);
-        if (length_value->IsIntConstant()) {
-          // TODO: may optimize for constant case.
-          continue;
-        }
-
-        if (length_value->IsPhi()) {
-          // When adding deoptimizations in outer loops, we might create
-          // a phi for the array length, and update all uses of the
-          // length in the loop to that phi. Therefore, inner loops having
-          // bounds checks on the same array will use that phi.
-          // TODO: handle these cases.
-          continue;
-        }
-
-        DCHECK(length_value->IsArrayLength());
-        HArrayLength* array_length = length_value->AsArrayLength();
-
-        HInstruction* array = array_length->InputAt(0);
-        if (array->IsNullCheck()) {
-          array = array->AsNullCheck()->InputAt(0);
-        }
-        if (loop_info->Contains(*array->GetBlock())) {
-          // Array is defined inside the loop. Skip.
-          continue;
-        }
-
-        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
-          // There is already access for another array recorded for the loop.
-          // TODO: handle multiple arrays.
-          continue;
-        }
-
-        HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
-        HInstruction* left = index;
-        int32_t right = 0;
-        if (left == induction_variable_ ||
-            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
-             left == induction_variable_)) {
-          // For patterns like array[i] or array[i + 2].
-          if (right < offset_low_) {
-            offset_low_ = right;
-          }
-          if (right > offset_high_) {
-            offset_high_ = right;
-          }
-        } else {
-          // Access not in induction_variable/(induction_variable_ + constant)
-          // format. Skip.
-          continue;
-        }
-        // Record this array.
-        found_array_length_ = array_length;
-      }
-    }
-  }
-
- private:
-  // The instruction that corresponds to a MonotonicValueRange.
-  HInstruction* induction_variable_;
-
-  // The array length of the array that's accessed inside the loop body.
-  HArrayLength* found_array_length_;
-
-  // The lowest and highest constant offsets relative to induction variable
-  // instruction_ in all array accesses.
-  // If array access are: array[i-1], array[i], array[i+1],
-  // offset_low_ is -1 and offset_high is 1.
-  int32_t offset_low_;
-  int32_t offset_high_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +368,13 @@
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
         initial_(initial),
-        end_(nullptr),
-        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
-  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
   ValueBound GetBound() const { return bound_; }
-  void SetEnd(HInstruction* end) { end_ = end; }
-  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
   HBasicBlock* GetLoopHeader() const {
     DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
     return induction_variable_->GetBlock();
@@ -519,23 +382,6 @@
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
-  HBasicBlock* GetLoopHeaderSuccesorInLoop() {
-    HBasicBlock* header = GetLoopHeader();
-    HInstruction* instruction = header->GetLastInstruction();
-    DCHECK(instruction->IsIf());
-    HIf* h_if = instruction->AsIf();
-    HLoopInformation* loop_info = header->GetLoopInformation();
-    bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
-    bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
-    // Just in case it's some strange loop structure.
-    if (true_successor_in_loop && false_successor_in_loop) {
-      return nullptr;
-    }
-    DCHECK(true_successor_in_loop || false_successor_in_loop);
-    return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
-  }
-
   // If it's certain that this value range fits in other_range.
   bool FitsIn(ValueRange* other_range) const OVERRIDE {
     if (other_range == nullptr) {
@@ -627,467 +473,9 @@
     }
   }
 
-  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
-  // For example, this loop:
-  //
-  //   for (int i = start; i < end; i++) {
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // will be transformed to:
-  //
-  //   int array_length_in_loop_body_if_needed;
-  //   if (start >= end) {
-  //     array_length_in_loop_body_if_needed = 0;
-  //   } else {
-  //     if (start < 1) deoptimize();
-  //     if (array == null) deoptimize();
-  //     array_length = array.length;
-  //     if (end > array_length - 1) deoptimize;
-  //     array_length_in_loop_body_if_needed = array_length;
-  //   }
-  //   for (int i = start; i < end; i++) {
-  //     // No more null check and bounds check.
-  //     // array.length value is replaced with array_length_in_loop_body_if_needed
-  //     // in the loop body.
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // We basically first go through the loop body and find those array accesses whose
-  // index is at a constant offset from the induction variable ('i' in the above example),
-  // and update offset_low and offset_high along the way. We then add the following
-  // deoptimizations in the loop pre-header (suppose end is not inclusive).
-  //   if (start < -offset_low) deoptimize();
-  //   if (end >= array.length - offset_high) deoptimize();
-  // It might be necessary to first hoist array.length (and the null check on it) out of
-  // the loop with another deoptimization.
-  //
-  // In order not to trigger deoptimization unnecessarily, we want to make a strong
-  // guarantee that no deoptimization is triggered if the loop body itself doesn't
-  // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
-  // body must throw AIOOBE).
-  // This is achieved by the following:
-  // 1) We only process loops that iterate through the full monotonic range from
-  //    initial_ to end_. We do the following checks to make sure that's the case:
-  //    a) The loop doesn't have early exit (via break, return, etc.)
-  //    b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
-  // 2) We only collect array accesses of blocks in the loop body that dominate
-  //    all loop back edges, these array accesses are guaranteed to happen
-  //    at each loop iteration.
-  // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
-  // when the induction variable is at initial_ and end_ must be in a legal range.
-  // Since the added deoptimizations are basically checking the induction variable
-  // at initial_ and end_ values, no deoptimization will be triggered either.
-  //
-  // A special case is the loop body isn't entered at all. In that case, we may still
-  // add deoptimization due to the analysis described above. In order not to trigger
-  // deoptimization, we do a test between initial_ and end_ first and skip over
-  // the added deoptimization.
-  ValueRange* NarrowWithDeoptimization() {
-    if (increment_ != 1 && increment_ != -1) {
-      // In order not to trigger deoptimization unnecessarily, we want to
-      // make sure the loop iterates through the full range from initial_ to
-      // end_ so that boundaries are covered by the loop. An increment of 2,
-      // for example, may skip end_.
-      return this;
-    }
-
-    if (end_ == nullptr) {
-      // No full info to add deoptimization.
-      return this;
-    }
-
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't add a check in loop pre-header if the value isn't available there.
-      return this;
-    }
-
-    ArrayAccessInsideLoopFinder finder(induction_variable_);
-
-    if (!finder.HasFoundArrayLength()) {
-      // No array access was found inside the loop that can benefit
-      // from deoptimization.
-      return this;
-    }
-
-    if (!AddDeoptimization(finder)) {
-      return this;
-    }
-
-    // After added deoptimizations, induction variable fits in
-    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
-    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
-    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
-    // We've narrowed the range after added deoptimizations.
-    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
-  }
-
-  // Returns true if adding a (constant >= value) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    // See if we can prove the relationship first.
-    if (value->IsIntConstant()) {
-      if (value->AsIntConstant()->GetValue() >= constant) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Try to filter out cases that the loop entry test will never be true.
-  bool LoopEntryTestUseful() {
-    if (initial_->IsIntConstant() && end_->IsIntConstant()) {
-      int32_t initial_val = initial_->AsIntConstant()->GetValue();
-      int32_t end_val = end_->AsIntConstant()->GetValue();
-      if (increment_ == 1) {
-        if (inclusive_) {
-          return initial_val > end_val;
-        } else {
-          return initial_val >= end_val;
-        }
-      } else {
-        DCHECK_EQ(increment_, -1);
-        if (inclusive_) {
-          return initial_val < end_val;
-        } else {
-          return initial_val <= end_val;
-        }
-      }
-    }
-    return true;
-  }
-
-  // Returns the block for adding deoptimization.
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    // Deoptimization is only added when both initial_ and end_ are defined
-    // before the loop.
-    DCHECK(initial_->GetBlock()->Dominates(pre_header));
-    DCHECK(end_->GetBlock()->Dominates(pre_header));
-
-    // If it can be proven the loop body is definitely entered (unless exception
-    // is thrown in the loop header for which triggering deoptimization is fine),
-    // there is no need for tranforming the loop. In that case, deoptimization
-    // will just be added in the loop pre-header.
-    if (!LoopEntryTestUseful()) {
-      return pre_header;
-    }
-
-    HGraph* graph = header->GetGraph();
-    graph->TransformLoopHeaderForBCE(header);
-    HBasicBlock* new_pre_header = header->GetDominator();
-    DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
-    HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
-
-    dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
-    deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
-    new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
-    return deopt_block;
-  }
-
-  // Adds a test between initial_ and end_ to see if the loop body is entered.
-  // If the loop body isn't entered at all, it jumps to the loop pre-header (after
-  // transformation) to avoid any deoptimization.
-  void AddLoopBodyEntryTest() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    HBasicBlock* if_block = pre_header->GetDominator();
-    HGraph* graph = header->GetGraph();
-
-    HCondition* cond;
-    if (increment_ == 1) {
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
-      }
-    } else {
-      DCHECK_EQ(increment_, -1);
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HLessThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
-      }
-    }
-    HIf* h_if = new (graph->GetArena()) HIf(cond);
-    if_block->AddInstruction(cond);
-    if_block->AddInstruction(h_if);
-  }
-
-  // Adds a check that (value >= constant), and HDeoptimize otherwise.
-  void AddDeoptimizationConstant(HInstruction* value,
-                                 int32_t constant,
-                                 HBasicBlock* deopt_block,
-                                 bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-    if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
-    }
-
-    HIntConstant* const_instr = graph->GetIntConstant(constant);
-    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
-    HDeoptimize* deoptimize = new (graph->GetArena())
-        HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
-    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-        suspend_check->GetEnvironment(), header);
-  }
-
-  // Returns true if adding a (value <= array_length + offset) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationArrayLength(HInstruction* value,
-                                       HArrayLength* array_length,
-                                       int32_t offset,
-                                       bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    if (array_length->GetBlock() == header) {
-      // array_length_in_loop_body_if_needed only has correct value when the loop
-      // body is entered. We bail out in this case. Usually array_length defined
-      // in the loop header is already hoisted by licm.
-      return false;
-    } else {
-      // array_length is defined either before the loop header already, or in
-      // the loop body since it's used in the loop body. If it's defined in the loop body,
-      // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
-      // all the uses of array_length must be dominated by its definition in the loop
-      // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
-      // array_length once the loop body is entered so all the uses of the phi will
-      // use the correct value.
-    }
-
-    if (offset > 0) {
-      // There might be overflow issue.
-      // TODO: handle this, possibly with some distance relationship between
-      // offset_low and offset_high, or using another deoptimization to make
-      // sure (array_length + offset) doesn't overflow.
-      return false;
-    }
-
-    // See if we can prove the relationship first.
-    if (value == array_length) {
-      if (offset >= 0) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
-  void AddDeoptimizationArrayLength(HInstruction* value,
-                                    HArrayLength* array_length,
-                                    int32_t offset,
-                                    HBasicBlock* deopt_block,
-                                    bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
-    // We may need to hoist null-check and array_length out of loop first.
-    if (!array_length->GetBlock()->Dominates(deopt_block)) {
-      // array_length must be defined in the loop body.
-      DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
-      DCHECK(array_length->GetBlock() != header);
-
-      HInstruction* array = array_length->InputAt(0);
-      HNullCheck* null_check = array->AsNullCheck();
-      if (null_check != nullptr) {
-        array = null_check->InputAt(0);
-      }
-      // We've already made sure the array is defined before the loop when collecting
-      // array accesses for the loop.
-      DCHECK(array->GetBlock()->Dominates(deopt_block));
-      if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
-        // Hoist null check out of loop with a deoptimization.
-        HNullConstant* null_constant = graph->GetNullConstant();
-        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
-        // TODO: for one dex_pc, share the same deoptimization slow path.
-        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
-            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
-        deopt_block->InsertInstructionBefore(
-            null_check_cond, deopt_block->GetLastInstruction());
-        deopt_block->InsertInstructionBefore(
-            null_check_deoptimize, deopt_block->GetLastInstruction());
-        // Eliminate null check in the loop.
-        null_check->ReplaceWith(array);
-        null_check->GetBlock()->RemoveInstruction(null_check);
-        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-            suspend_check->GetEnvironment(), header);
-      }
-
-      HArrayLength* new_array_length
-          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
-      deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
-      if (loop_entry_test_block_added) {
-        // Replace array_length defined inside the loop body with a phi
-        // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
-        // no vreg number for it.
-        HPhi* phi = new (graph->GetArena()) HPhi(
-            graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
-        // Set to 0 if the loop body isn't entered.
-        phi->SetRawInputAt(0, graph->GetIntConstant(0));
-        // Set to array.length if the loop body is entered.
-        phi->SetRawInputAt(1, new_array_length);
-        pre_header->AddPhi(phi);
-        array_length->ReplaceWith(phi);
-        // Make sure phi is only used after the loop body is entered.
-        if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> it(phi->GetUses());
-               !it.Done();
-               it.Advance()) {
-            HInstruction* user = it.Current()->GetUser();
-            DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
-          }
-        }
-      } else {
-        array_length->ReplaceWith(new_array_length);
-      }
-
-      array_length->GetBlock()->RemoveInstruction(array_length);
-      // Use new_array_length for deopt.
-      array_length = new_array_length;
-    }
-
-    HInstruction* added = array_length;
-    if (offset != 0) {
-      HIntConstant* offset_instr = graph->GetIntConstant(offset);
-      added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
-      deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
-    }
-    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
-    HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
-    deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
-  }
-
-  // Adds deoptimizations in loop pre-header with the collected array access
-  // data so that value ranges can be established in loop body.
-  // Returns true if deoptimizations are successfully added, or if it's proven
-  // it's not necessary.
-  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
-    int32_t offset_low = finder.GetOffsetLow();
-    int32_t offset_high = finder.GetOffsetHigh();
-    HArrayLength* array_length = finder.GetFoundArrayLength();
-
-    HBasicBlock* pre_header =
-        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't move initial_ or end_ into pre_header for comparisons.
-      return false;
-    }
-
-    HBasicBlock* deopt_block;
-    bool loop_entry_test_block_added = false;
-    bool is_constant_proven, is_length_proven;
-
-    HInstruction* const_comparing_instruction;
-    int32_t const_compared_to;
-    HInstruction* array_length_comparing_instruction;
-    int32_t array_length_offset;
-    if (increment_ == 1) {
-      // Increasing from initial_ to end_.
-      const_comparing_instruction = initial_;
-      const_compared_to = -offset_low;
-      array_length_comparing_instruction = end_;
-      array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
-    } else {
-      const_comparing_instruction = end_;
-      const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
-      array_length_comparing_instruction = initial_;
-      array_length_offset = -offset_high - 1;
-    }
-
-    if (CanAddDeoptimizationConstant(const_comparing_instruction,
-                                     const_compared_to,
-                                     &is_constant_proven) &&
-        CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                        array_length,
-                                        array_length_offset,
-                                        &is_length_proven)) {
-      if (!is_constant_proven || !is_length_proven) {
-        deopt_block = TransformLoopForDeoptimizationIfNeeded();
-        loop_entry_test_block_added = (deopt_block != pre_header);
-        if (loop_entry_test_block_added) {
-          // Loop body may be entered.
-          AddLoopBodyEntryTest();
-        }
-      }
-      if (!is_constant_proven) {
-        AddDeoptimizationConstant(const_comparing_instruction,
-                                  const_compared_to,
-                                  deopt_block,
-                                  loop_entry_test_block_added);
-      }
-      if (!is_length_proven) {
-        AddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                     array_length,
-                                     array_length_offset,
-                                     deopt_block,
-                                     loop_entry_test_block_added);
-      }
-      return true;
-    }
-    return false;
-  }
-
  private:
   HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
   HInstruction* const initial_;     // Initial value.
-  HInstruction* end_;               // End value.
-  bool inclusive_;                  // Whether end value is inclusive.
   const int32_t increment_;         // Increment for each loop iteration.
   const ValueBound bound_;          // Additional value bound info for initial_.
 
@@ -1111,7 +499,9 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BCEVisitor(HGraph* graph,
+             const SideEffectsAnalysis& side_effects,
+             HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
         maps_(graph->GetBlocks().size(),
               ArenaSafeMap<int, ValueRange*>(
@@ -1121,8 +511,17 @@
         first_constant_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        early_exit_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        taken_test_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         need_to_revisit_block_(false),
+        has_deoptimization_on_constant_subscripts_(false),
         initial_block_size_(graph->GetBlocks().size()),
+        side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -1138,6 +537,17 @@
     }
   }
 
+  void Finish() {
+    // Preserve SSA structure which may have been broken by adding one or more
+    // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+    InsertPhiNodes();
+
+    // Clear the loop data structures.
+    early_exit_loop_.clear();
+    taken_test_loop_.clear();
+    finite_loop_.clear();
+  }
+
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
   ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1166,23 +576,6 @@
     return nullptr;
   }
 
-  // Return the range resulting from induction variable analysis of "instruction" when the value
-  // is used from "context", for example, an index used from a bounds-check inside a loop body.
-  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction);
-    InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
-      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
-      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
-    }
-    // Didn't find anything useful.
-    return nullptr;
-  }
-
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1328,17 +721,6 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() < 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondLT);
-        }
-      }
-
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1362,17 +744,6 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() > 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondGT);
-        }
-      }
-
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -1398,38 +769,34 @@
     }
   }
 
-  void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
     DCHECK(array_length->IsIntConstant() ||
            array_length->IsArrayLength() ||
            array_length->IsPhi());
-
-    if (array_length->IsPhi()) {
-      // Input 1 of the phi contains the real array.length once the loop body is
-      // entered. That value will be used for bound analysis. The graph is still
-      // strictly in SSA form.
-      array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
-    }
+    bool try_dynamic_bce = true;
 
     if (!index->IsIntConstant()) {
+      // Non-constant subscript.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by local analysis.
+      // Try range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+        ReplaceInstruction(bounds_check, index);
         return;
       }
       // Try range obtained by induction variable analysis.
-      index_range = LookupInductionRange(bounds_check, index);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+      // Disables dynamic bce if OOB is certain.
+      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+        ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
+      // Constant subscript.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
@@ -1437,7 +804,7 @@
       }
       if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
         }
         return;
       }
@@ -1448,7 +815,7 @@
         ValueBound lower = existing_range->GetLower();
         DCHECK(lower.IsConstant());
         if (constant < lower.GetConstant()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
           return;
         } else {
           // Existing range isn't strong enough to eliminate the bounds check.
@@ -1483,11 +850,11 @@
           ValueRange(GetGraph()->GetArena(), lower, upper);
       GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
-  }
 
-  void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
-    bounds_check->ReplaceWith(index);
-    bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+    // If static analysis fails, and OOB is not certain, try dynamic elimination.
+    if (try_dynamic_bce) {
+      TryDynamicBCE(bounds_check);
+    }
   }
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
@@ -1506,7 +873,7 @@
     return true;
   }
 
-  void VisitPhi(HPhi* phi) {
+  void VisitPhi(HPhi* phi) OVERRIDE {
     if (phi->IsLoopHeaderPhi()
         && (phi->GetType() == Primitive::kPrimInt)
         && HasSameInputAtBackEdges(phi)) {
@@ -1553,7 +920,7 @@
     }
   }
 
-  void VisitIf(HIf* instruction) {
+  void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
       IfCondition cmp = cond->GetCondition();
@@ -1562,42 +929,11 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
-
-        HBasicBlock* block = instruction->GetBlock();
-        ValueRange* left_range = LookupValueRange(left, block);
-        if (left_range == nullptr) {
-          return;
-        }
-
-        if (left_range->IsMonotonicValueRange() &&
-            block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
-          // The comparison is for an induction variable in the loop header.
-          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
-          HBasicBlock* loop_body_successor =
-            left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
-          if (loop_body_successor == nullptr) {
-            // In case it's some strange loop structure.
-            return;
-          }
-          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
-          if ((new_left_range == left_range) ||
-              // Range narrowed with deoptimization is usually more useful than
-              // a constant range.
-              new_left_range->IsConstantValueRange()) {
-            // We are not successful in narrowing the monotonic value range to
-            // a regular value range. Try using deoptimization.
-            new_left_range = left_range->AsMonotonicValueRange()->
-                NarrowWithDeoptimization();
-            if (new_left_range != left_range) {
-              GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
-            }
-          }
-        }
       }
     }
   }
 
-  void VisitAdd(HAdd* add) {
+  void VisitAdd(HAdd* add) OVERRIDE {
     HInstruction* right = add->GetRight();
     if (right->IsIntConstant()) {
       ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1611,7 +947,7 @@
     }
   }
 
-  void VisitSub(HSub* sub) {
+  void VisitSub(HSub* sub) OVERRIDE {
     HInstruction* left = sub->GetLeft();
     HInstruction* right = sub->GetRight();
     if (right->IsIntConstant()) {
@@ -1713,19 +1049,19 @@
     }
   }
 
-  void VisitDiv(HDiv* div) {
+  void VisitDiv(HDiv* div) OVERRIDE {
     FindAndHandlePartialArrayLength(div);
   }
 
-  void VisitShr(HShr* shr) {
+  void VisitShr(HShr* shr) OVERRIDE {
     FindAndHandlePartialArrayLength(shr);
   }
 
-  void VisitUShr(HUShr* ushr) {
+  void VisitUShr(HUShr* ushr) OVERRIDE {
     FindAndHandlePartialArrayLength(ushr);
   }
 
-  void VisitAnd(HAnd* instruction) {
+  void VisitAnd(HAnd* instruction) OVERRIDE {
     if (instruction->GetRight()->IsIntConstant()) {
       int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
       if (constant > 0) {
@@ -1740,7 +1076,7 @@
     }
   }
 
-  void VisitNewArray(HNewArray* new_array) {
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->InputAt(0);
     if (!len->IsIntConstant()) {
       HInstruction *left;
@@ -1764,9 +1100,12 @@
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) {
-    // Right now it's only HLessThanOrEqual.
-    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
+      return;
+    }
+    // If this instruction was added by AddCompareWithDeoptimization(), narrow
+    // the range accordingly in subsequent basic blocks.
     HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
     HInstruction* instruction = less_than_or_equal->InputAt(0);
     if (instruction->IsArrayLength()) {
@@ -1780,6 +1119,35 @@
     }
   }
 
+  /**
+    * After null/bounds checks are eliminated, some invariant array references
+    * may be exposed underneath which can be hoisted out of the loop to the
+    * preheader or, in combination with dynamic bce, the deoptimization block.
+    *
+    * for (int i = 0; i < n; i++) {
+    *                                <-------+
+    *   for (int j = 0; j < n; j++)          |
+    *     a[i][j] = 0;               --a[i]--+
+    * }
+    *
+    * Note: this optimization is no longer applied after deoptimization on array references
+    * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
+    * those cases it would be unsafe to hoist array references across their deoptimization
+    * instruction inside a loop.
+    */
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+      HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+      if (loop->IsLoopInvariant(array_get->InputAt(0), false) &&
+          loop->IsLoopInvariant(array_get->InputAt(1), false)) {
+        SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+        if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+          HoistToPreheaderOrDeoptBlock(loop, array_get);
+        }
+      }
+    }
+  }
+
   void AddCompareWithDeoptimization(HInstruction* array_length,
                                     HIntConstant* const_instr,
                                     HBasicBlock* block) {
@@ -1801,6 +1169,9 @@
     block->InsertInstructionBefore(cond, bounds_check);
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+    // Flag that this kind of deoptimization on array references with constant
+    // subscripts has occurred to prevent further hoisting of these references.
+    has_deoptimization_on_constant_subscripts_ = true;
   }
 
   void AddComparesWithDeoptimization(HBasicBlock* block) {
@@ -1844,21 +1215,425 @@
     }
   }
 
+  /**
+   * Returns true if static range analysis based on induction variables can determine the bounds
+   * check on the given array range is always satisfied with the computed index range. The output
+   * parameter try_dynamic_bce is set to false if OOB is certain.
+   */
+  bool InductionRangeFitsIn(ValueRange* array_range,
+                            HInstruction* context,
+                            HInstruction* index,
+                            bool* try_dynamic_bce) {
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
+    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+      ValueRange index_range(GetGraph()->GetArena(),
+                             ValueBound(v1.instruction, v1.b_constant),
+                             ValueBound(v2.instruction, v2.b_constant));
+      // If analysis reveals a certain OOB, disable dynamic BCE.
+      *try_dynamic_bce = !index_range.GetLower().LessThan(array_range->GetLower()) &&
+                         !index_range.GetUpper().GreaterThan(array_range->GetUpper());
+      // Use analysis for static bce only if loop is finite.
+      return !needs_finite_test && index_range.FitsIn(array_range);
+    }
+    return false;
+  }
+
+  /**
+   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
+   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
+   * will go out of range (we want to be rather certain of that given the slowdown of
+   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
+   * bounds checks and related null checks removed.
+   */
+  void TryDynamicBCE(HBoundsCheck* instruction) {
+    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
+    HInstruction* index = instruction->InputAt(0);
+    HInstruction* length = instruction->InputAt(1);
+    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
+    bool needs_finite_test = false;
+    bool needs_taken_test = false;
+    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
+        induction_range_.CanGenerateCode(
+            instruction, index, &needs_finite_test, &needs_taken_test) &&
+        CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
+      HInstruction* lower = nullptr;
+      HInstruction* upper = nullptr;
+      // Generate the following unsigned comparisons
+      //     if (lower > upper)   deoptimize;
+      //     if (upper >= length) deoptimize;
+      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
+      //     if (upper >= length) deoptimize;
+      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
+      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
+      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
+      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
+      if (lower != nullptr) {
+        InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+      }
+      InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+      ReplaceInstruction(instruction, index);
+    }
+  }
+
+  /**
+   * Returns true if heuristics indicate that dynamic bce may be profitable.
+   */
+  bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+    if (loop != nullptr) {
+      // A try boundary preheader is hard to handle.
+      // TODO: remove this restriction
+      if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+        return false;
+      }
+      // Does loop have early-exits? If so, the full range may not be covered by the loop
+      // at runtime and testing the range may apply deoptimization unnecessarily.
+      if (IsEarlyExitLoop(loop)) {
+        return false;
+      }
+      // Does the current basic block dominate all back edges? If not,
+      // don't apply dynamic bce to something that may not be executed.
+      for (HBasicBlock* back_edge : loop->GetBackEdges()) {
+        if (!block->Dominates(back_edge)) {
+          return false;
+        }
+      }
+      // Success!
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the loop has early exits, which implies it may not cover
+   * the full range computed by range analysis based on induction variables.
+   */
+  bool IsEarlyExitLoop(HLoopInformation* loop) {
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+    auto it = early_exit_loop_.find(loop_id);
+    if (it != early_exit_loop_.end()) {
+      return it->second;
+    }
+    // First time early-exit analysis for this loop. Since analysis requires scanning
+    // the full loop-body, results of the analysis is stored for subsequent queries.
+    HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+    for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+      for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+        if (!loop->Contains(*successor)) {
+          early_exit_loop_.Put(loop_id, true);
+          return true;
+        }
+      }
+    }
+    early_exit_loop_.Put(loop_id, false);
+    return false;
+  }
+
+  /**
+   * Returns true if the array length is already loop invariant, or can be made so
+   * by handling the null check under the hood of the array length operation.
+   */
+  bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+    if (loop->IsLoopInvariant(length, false)) {
+      return true;
+    } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+      if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+        HoistToPreheaderOrDeoptBlock(loop, length);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the null check is already loop invariant, or can be made so
+   * by generating a deoptimization test.
+   */
+  bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+    if (loop->IsLoopInvariant(check, false)) {
+      return true;
+    } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+      HInstruction* array = check->InputAt(0);
+      if (loop->IsLoopInvariant(array, false)) {
+        // Generate: if (array == null) deoptimize;
+        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HInstruction* cond =
+            new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+        InsertDeopt(loop, block, cond);
+        ReplaceInstruction(check, array);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if compiler can apply dynamic bce to loops that may be infinite
+   * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+   * the range analysis evaluation code by "overshooting" the computed range.
+   * Since deoptimization would be a bad choice, and there is no other version
+   * of the loop to use, dynamic bce in such cases is only allowed if other tests
+   * ensure the loop is finite.
+   */
+  bool CanHandleInfiniteLoop(
+      HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+    if (needs_infinite_test) {
+      // If we already forced the loop to be finite, allow directly.
+      const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+      if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+        return true;
+      }
+      // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+      // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+      // ensure upper bound cannot cause an infinite loop.
+      HInstruction* control = loop->GetHeader()->GetLastInstruction();
+      if (control->IsIf()) {
+        HInstruction* if_expr = control->AsIf()->InputAt(0);
+        if (if_expr->IsCondition()) {
+          HCondition* condition = if_expr->AsCondition();
+          if (index == condition->InputAt(0) ||
+              index == condition->InputAt(1)) {
+            finite_loop_.insert(loop_id);
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /** Inserts a deoptimization test. */
+  void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+    HInstruction* suspend = loop->GetSuspendCheck();
+    block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+    if (suspend->HasEnvironment()) {
+      deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+          suspend->GetEnvironment(), loop->GetHeader());
+    }
+  }
+
+  /** Hoists instruction out of the loop to preheader or deoptimization block. */
+  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    HBasicBlock* preheader = loop->GetPreHeader();
+    HBasicBlock* block = preheader;
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      block = it->second;
+    }
+    // Hoist the instruction.
+    DCHECK(!instruction->HasEnvironment());
+    instruction->MoveBefore(block->GetLastInstruction());
+  }
+
+  /**
+   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * The taken-test protects range analysis evaluation code to avoid any
+   * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+   *
+   * Returns block in which deoptimizations/invariants can be put.
+   *
+   *          old_preheader
+   *               |
+   *            if_block          <- taken-test protects deoptimization block
+   *            /      \
+   *     true_block  false_block  <- deoptimizations/invariants are placed in true_block
+   *            \       /
+   *          new_preheader       <- may require phi nodes to preserve SSA structure
+   *                |
+   *             header
+   *
+   * For example, this loop:
+   *
+   *   for (int i = lower; i < upper; i++) {
+   *     array[i] = 0;
+   *   }
+   *
+   * will be transformed to:
+   *
+   *   if (lower < upper) {
+   *     if (array == null) deoptimize;
+   *     array_length = array.length;
+   *     if (lower > upper)         deoptimize;  // unsigned
+   *     if (upper >= array_length) deoptimize;  // unsigned
+   *   } else {
+   *     array_length = 0;
+   *   }
+   *   for (int i = lower; i < upper; i++) {
+   *     // Loop without null check and bounds check, and any array.length replaced with array_length.
+   *     array[i] = 0;
+   *   }
+   */
+  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader), or already done (can reuse)?
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    if (!needs_taken_test) {
+      return loop->GetPreHeader();
+    } else {
+      auto it = taken_test_loop_.find(loop_id);
+      if (it != taken_test_loop_.end()) {
+        return it->second;
+      }
+    }
+
+    // Generate top test structure.
+    HBasicBlock* header = loop->GetHeader();
+    GetGraph()->TransformLoopHeaderForBCE(header);
+    HBasicBlock* new_preheader = loop->GetPreHeader();
+    HBasicBlock* if_block = new_preheader->GetDominator();
+    HBasicBlock* true_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* false_block = if_block->GetSuccessors()[1];  // False successor.
+
+    // Goto instructions.
+    true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+    // Insert the taken-test to see if the loop body is entered. If the
+    // loop isn't entered at all, it jumps around the deoptimization block.
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());  // placeholder
+    HInstruction* condition = nullptr;
+    induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+                                       GetGraph(),
+                                       if_block,
+                                       &condition);
+    DCHECK(condition != nullptr);
+    if_block->RemoveInstruction(if_block->GetLastInstruction());
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+    taken_test_loop_.Put(loop_id, true_block);
+    return true_block;
+  }
+
+  /**
+   * Inserts phi nodes that preserve SSA structure in generated top test structures.
+   * All uses of instructions in the deoptimization block that reach the loop need
+   * a phi node in the new loop preheader to fix the dominance relation.
+   *
+   * Example:
+   *           if_block
+   *            /      \
+   *         x_0 = ..  false_block
+   *            \       /
+   *           x_1 = phi(x_0, null)   <- synthetic phi
+   *               |
+   *             header
+   */
+  void InsertPhiNodes() {
+    // Scan all new deoptimization blocks.
+    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+      HBasicBlock* true_block = it1->second;
+      HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+      // Scan all instructions in a new deoptimization block.
+      for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* instruction = it.Current();
+        Primitive::Type type = instruction->GetType();
+        HPhi* phi = nullptr;
+        // Scan all uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HInstruction*> it2(instruction->GetUses());
+             !it2.Done();
+             it2.Advance()) {
+          HInstruction* user = it2.Current()->GetUser();
+          if (user->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->ReplaceInput(phi, it2.Current()->GetIndex());
+          }
+        }
+        // Scan all environment uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
+             !it2.Done();
+             it2.Advance()) {
+          HEnvironment* user = it2.Current()->GetUser();
+          if (user->GetHolder()->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->RemoveAsUserOfInput(it2.Current()->GetIndex());
+            user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
+            phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+   * These are synthetic phi nodes without a virtual register.
+   */
+  HPhi* NewPhi(HBasicBlock* new_preheader,
+               HInstruction* instruction,
+               Primitive::Type type) {
+    HGraph* graph = GetGraph();
+    HInstruction* zero;
+    switch (type) {
+      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      default: zero = graph->GetConstant(type, 0); break;
+    }
+    HPhi* phi = new (graph->GetArena())
+        HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+    phi->SetRawInputAt(0, instruction);
+    phi->SetRawInputAt(1, zero);
+    new_preheader->AddPhi(phi);
+    return phi;
+  }
+
+  /** Helper method to replace an instruction with another instruction. */
+  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    instruction->ReplaceWith(replacement);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+
+  // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
   // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
   // a block that checks a constant index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
 
+  // Early-exit loop bookkeeping.
+  ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+  // Taken-test loop bookkeeping.
+  ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+  // Finite loop bookkeeping.
+  ArenaSet<uint32_t> finite_loop_;
+
   // For the block, there is at least one HArrayLength instruction for which there
   // is more than one bounds check instruction with constant indexing. And it's
   // beneficial to add a compare instruction that has deoptimization fallback and
   // eliminate those bounds checks.
   bool need_to_revisit_block_;
 
+  // Flag that denotes whether deoptimization has occurred on array references
+  // with constant subscripts (see AddCompareWithDeoptimization()).
+  bool has_deoptimization_on_constant_subscripts_;
+
   // Initial number of blocks.
   uint32_t initial_block_size_;
 
+  // Side effects.
+  const SideEffectsAnalysis& side_effects_;
+
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
@@ -1870,14 +1645,12 @@
     return;
   }
 
-  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
-  //
-  // TODO: only visit blocks that dominate some array accesses.
+  BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
   HBasicBlock* last_visited_block = nullptr;
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
@@ -1894,6 +1667,9 @@
     visitor.VisitBasicBlock(current);
     last_visited_block = current;
   }
+
+  // Perform cleanup.
+  visitor.Finish();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca..b9df686 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,12 +21,16 @@
 
 namespace art {
 
+class SideEffectsAnalysis;
 class HInductionVarAnalysis;
 
 class BoundsCheckElimination : public HOptimization {
  public:
-  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BoundsCheckElimination(HGraph* graph,
+                         const SideEffectsAnalysis& side_effects,
+                         HInductionVarAnalysis* induction_analysis)
       : HOptimization(graph, kBoundsCheckEliminiationPassName),
+        side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
@@ -34,6 +38,7 @@
   static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
 
  private:
+  const SideEffectsAnalysis& side_effects_;
   HInductionVarAnalysis* induction_analysis_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2..dbeb1cc 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -54,7 +54,7 @@
     HInductionVarAnalysis induction(graph_);
     induction.Run();
 
-    BoundsCheckElimination(graph_, &induction).Run();
+    BoundsCheckElimination(graph_, side_effects, &induction).Run();
   }
 
   ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index ed193c7..8e75bdc 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -359,18 +359,10 @@
           // need a strategy for splitting exceptional edges. We split the block
           // after the move-exception (if present) and mark the first part not
           // throwing. The normal-flow edge between them will be split later.
-          HInstruction* first_insn = block->GetFirstInstruction();
-          if (first_insn->IsLoadException()) {
-            // Catch block starts with a LoadException. Split the block after
-            // the StoreLocal and ClearException which must come after the load.
-            DCHECK(first_insn->GetNext()->IsStoreLocal());
-            DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-            throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
-          } else {
-            // Catch block does not load the exception. Split at the beginning
-            // to create an empty catch block.
-            throwing_block = block->SplitBefore(first_insn);
-          }
+          throwing_block = block->SplitCatchBlockAfterMoveException();
+          // Move-exception does not throw and the block has throwing insructions
+          // so it must have been possible to split it.
+          DCHECK(throwing_block != nullptr);
         }
 
         try_block_info.Put(throwing_block->GetBlockId(),
@@ -743,6 +735,79 @@
   }
 }
 
+ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time compilation),
+      // bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    uint16_t vtable_index = resolved_method->GetMethodIndex();
+    ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+        vtable_index, class_linker->GetImagePointerSize());
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      // could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
                                 uint32_t dex_pc,
                                 uint32_t method_idx,
@@ -750,22 +815,18 @@
                                 bool is_range,
                                 uint32_t* args,
                                 uint32_t register_index) {
-  InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  InvokeType optimized_invoke_type = original_invoke_type;
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
   const char* descriptor = dex_file_->GetMethodShorty(method_idx);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
 
   // Remove the return type from the 'proto'.
   size_t number_of_arguments = strlen(descriptor) - 1;
-  if (original_invoke_type != kStatic) {  // instance call
+  if (invoke_type != kStatic) {  // instance call
     // One extra argument for 'this'.
     number_of_arguments++;
   }
 
   MethodReference target_method(dex_file_, method_idx);
-  int32_t table_index = 0;
-  uintptr_t direct_code = 0;
-  uintptr_t direct_method = 0;
 
   // Special handling for string init.
   int32_t string_init_offset = 0;
@@ -788,7 +849,7 @@
         method_idx,
         target_method,
         dispatch_info,
-        original_invoke_type,
+        invoke_type,
         kStatic /* optimized_invoke_type */,
         HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
     return HandleStringInit(invoke,
@@ -799,23 +860,16 @@
                             descriptor);
   }
 
-  // Handle unresolved methods.
-  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
-                                           dex_pc,
-                                           true /* update_stats */,
-                                           true /* enable_devirtualization */,
-                                           &optimized_invoke_type,
-                                           &target_method,
-                                           &table_index,
-                                           &direct_code,
-                                           &direct_method)) {
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (resolved_method == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
                                                      dex_pc,
                                                      method_idx,
-                                                     original_invoke_type);
+                                                     invoke_type);
     return HandleInvoke(invoke,
                         number_of_vreg_arguments,
                         args,
@@ -825,21 +879,26 @@
                         nullptr /* clinit_check */);
   }
 
-  // Handle resolved methods (non string init).
-
-  DCHECK(optimized_invoke_type != kSuper);
-
   // Potential class initialization check, in the case of a static method call.
   HClinitCheck* clinit_check = nullptr;
   HInvoke* invoke = nullptr;
 
-  if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
     // By default, consider that the called method implicitly requires
     // an initialization check of its declaring method.
     HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
         = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    if (optimized_invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
     }
 
     HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
@@ -855,24 +914,26 @@
                                                 method_idx,
                                                 target_method,
                                                 dispatch_info,
-                                                original_invoke_type,
-                                                optimized_invoke_type,
+                                                invoke_type,
+                                                invoke_type,
                                                 clinit_check_requirement);
-  } else if (optimized_invoke_type == kVirtual) {
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeVirtual(arena_,
                                          number_of_arguments,
                                          return_type,
                                          dex_pc,
                                          method_idx,
-                                         table_index);
+                                         resolved_method->GetMethodIndex());
   } else {
-    DCHECK_EQ(optimized_invoke_type, kInterface);
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeInterface(arena_,
                                            number_of_arguments,
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           table_index);
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
@@ -884,26 +945,106 @@
                       clinit_check);
 }
 
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
-      uint32_t dex_pc,
-      uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+  bool finalizable;
+  bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+
+  // Only the non-resolved entrypoint handles the finalizable class case. If we
+  // need access checks, then we haven't resolved the method and the class may
+  // again be finalizable.
+  QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+      ? kQuickAllocObject
+      : kQuickAllocObjectInitialized;
+
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
+  StackHandleScope<3> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
           soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
-      soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
-
-  DCHECK(resolved_method != nullptr);
-
+  Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
   const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
       outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+
+  if (outer_dex_cache.Get() != dex_cache.Get()) {
+    // We currently do not support inlining allocations across dex files.
+    return false;
+  }
+
+  HLoadClass* load_class = new (arena_) HLoadClass(
+      graph_->GetCurrentMethod(),
+      type_index,
+      outer_dex_file,
+      IsOutermostCompilingClass(type_index),
+      dex_pc,
+      /*needs_access_check*/ can_throw,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
+
+  current_block_->AddInstruction(load_class);
+  HInstruction* cls = load_class;
+  if (!IsInitialized(resolved_class)) {
+    cls = new (arena_) HClinitCheck(load_class, dex_pc);
+    current_block_->AddInstruction(cls);
+  }
+
+  current_block_->AddInstruction(new (arena_) HNewInstance(
+      cls,
+      graph_->GetCurrentMethod(),
+      dex_pc,
+      type_index,
+      *dex_compilation_unit_->GetDexFile(),
+      can_throw,
+      finalizable,
+      entrypoint));
+  return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
+
+bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+  if (cls.Get() == nullptr) {
+    return false;
+  }
+
+  // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+  // check whether the class is in an image for the AOT compilation.
+  if (cls->IsInitialized() &&
+      compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+    return true;
+  }
+
+  if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  // TODO: We should walk over the inlined methods, but we don't pass
+  //       that information to the builder.
+  if (IsSubClass(GetCompilingClass(), cls.Get())) {
+    return true;
+  }
+
+  return false;
+}
+
+HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
+      uint32_t dex_pc,
+      ArtMethod* resolved_method,
+      uint32_t method_idx,
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, *dex_compilation_unit_->GetDexFile())));
+  Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+  Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
   // The index at which the method's class is stored in the DexCache's type array.
   uint32_t storage_index = DexFile::kDexNoIndex;
@@ -921,41 +1062,21 @@
 
   HClinitCheck* clinit_check = nullptr;
 
-  if (!outer_class->IsInterface()
-      && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) {
-    // If the outer class is the declaring class or a subclass
-    // of the declaring class, no class initialization is needed
-    // before the static method call.
-    // Note that in case of inlining, we do not need to add clinit checks
-    // to calls that satisfy this subclass check with any inlined methods. This
-    // will be detected by the optimization passes.
+  if (IsInitialized(resolved_method_class)) {
     *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
   } else if (storage_index != DexFile::kDexNoIndex) {
-    // If the method's class type index is available, check
-    // whether we should add an explicit class initialization
-    // check for its declaring class before the static method call.
-
-    // TODO: find out why this check is needed.
-    bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-        *outer_compilation_unit_->GetDexFile(), storage_index);
-    bool is_initialized =
-        resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
-    if (is_initialized) {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
-    } else {
-      *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
-      HLoadClass* load_class = new (arena_) HLoadClass(
-          graph_->GetCurrentMethod(),
-          storage_index,
-          *dex_compilation_unit_->GetDexFile(),
-          is_outer_class,
-          dex_pc,
-          /*needs_access_check*/ false);
-      current_block_->AddInstruction(load_class);
-      clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
-      current_block_->AddInstruction(clinit_check);
-    }
+    *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+    HLoadClass* load_class = new (arena_) HLoadClass(
+        graph_->GetCurrentMethod(),
+        storage_index,
+        outer_dex_file,
+        is_outer_class,
+        dex_pc,
+        /*needs_access_check*/ false,
+        compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+    current_block_->AddInstruction(load_class);
+    clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+    current_block_->AddInstruction(clinit_check);
   }
   return clinit_check;
 }
@@ -1006,7 +1127,9 @@
     return false;
   }
 
-  if (invoke->IsInvokeStaticOrDirect()) {
+  if (invoke->IsInvokeStaticOrDirect() &&
+      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
     invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
     (*argument_index)++;
   }
@@ -1278,7 +1401,7 @@
   uint16_t field_index = instruction.VRegB_21c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<4> hs(soa.Self());
+  StackHandleScope<5> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
           soa.Self(), *dex_compilation_unit_->GetDexFile())));
@@ -1324,26 +1447,26 @@
     }
   }
 
-  // TODO: find out why this check is needed.
-  bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
-      *outer_compilation_unit_->GetDexFile(), storage_index);
-  bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
+  bool is_in_cache =
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
   HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
                                                  storage_index,
-                                                 *dex_compilation_unit_->GetDexFile(),
+                                                 outer_dex_file,
                                                  is_outer_class,
                                                  dex_pc,
-                                                 /*needs_access_check*/ false);
+                                                 /*needs_access_check*/ false,
+                                                 is_in_cache);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
-  if (!is_initialized && !is_outer_class) {
+
+  Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+  if (!IsInitialized(klass)) {
     cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
 
-  uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+  uint16_t class_def_index = klass->GetDexClassDefIndex();
   if (is_put) {
     // We need to keep the class alive before loading the value.
     Temporaries temps(graph_);
@@ -1455,7 +1578,8 @@
                                         uint32_t* args,
                                         uint32_t register_index) {
   HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
-  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+  bool finalizable;
+  QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
       ? kQuickAllocArrayWithAccessCheck
       : kQuickAllocArray;
   HInstruction* object = new (arena_) HNewArray(length,
@@ -1606,19 +1730,20 @@
 
   ScopedObjectAccess soa(Thread::Current());
   StackHandleScope<2> hs(soa.Self());
+  const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
-      dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
+      dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
   Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
 
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
   HLoadClass* cls = new (arena_) HLoadClass(
       graph_->GetCurrentMethod(),
       type_index,
-      *dex_compilation_unit_->GetDexFile(),
+      dex_file,
       IsOutermostCompilingClass(type_index),
       dex_pc,
-      !can_access);
+      !can_access,
+      compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
   current_block_->AddInstruction(cls);
 
   // The class needs a temporary before being used by the type check.
@@ -1635,9 +1760,9 @@
   }
 }
 
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const {
+bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
   return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
-      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index);
+      dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
 }
 
 void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
@@ -2514,16 +2639,9 @@
         current_block_->AddInstruction(fake_string);
         UpdateLocal(register_index, fake_string, dex_pc);
       } else {
-        QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
-            ? kQuickAllocObjectWithAccessCheck
-            : kQuickAllocObject;
-
-        current_block_->AddInstruction(new (arena_) HNewInstance(
-            graph_->GetCurrentMethod(),
-            dex_pc,
-            type_index,
-            *dex_compilation_unit_->GetDexFile(),
-            entrypoint));
+        if (!BuildNewInstance(type_index, dex_pc)) {
+          return false;
+        }
         UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
       }
       break;
@@ -2532,7 +2650,8 @@
     case Instruction::NEW_ARRAY: {
       uint16_t type_index = instruction.VRegC_22c();
       HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
-      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+      bool finalizable;
+      QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
           ? kQuickAllocArrayWithAccessCheck
           : kQuickAllocArray;
       current_block_->AddInstruction(new (arena_) HNewArray(length,
@@ -2750,10 +2869,11 @@
       current_block_->AddInstruction(new (arena_) HLoadClass(
           graph_->GetCurrentMethod(),
           type_index,
-          *dex_compilation_unit_->GetDexFile(),
+          *dex_file_,
           IsOutermostCompilingClass(type_index),
           dex_pc,
-          !can_access));
+          !can_access,
+          compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
       break;
     }
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 9eaa4b6..c3979f3 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -138,7 +138,10 @@
   HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
   void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc);
   void InitializeParameters(uint16_t number_of_parameters);
-  bool NeedsAccessCheck(uint32_t type_index) const;
+
+  // Returns whether the current method needs access check for the type.
+  // Output parameter finalizable is set to whether the type is finalizable.
+  bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
 
   template<typename T>
   void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
@@ -302,8 +305,21 @@
 
   HClinitCheck* ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* method,
       uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Build a HNewInstance instruction.
+  bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+  // Return whether the compiler can assume `cls` is initialized.
+  bool IsInitialized(Handle<mirror::Class> cls) const
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
 
   ArenaAllocator* const arena_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a1bb5e0..0baa0e3 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -42,7 +42,7 @@
 
 #include "compiled_method.h"
 #include "dex/verified_method.h"
-#include "driver/dex_compilation_unit.h"
+#include "driver/compiler_driver.h"
 #include "gc_map_builder.h"
 #include "graph_visualizer.h"
 #include "intrinsics.h"
@@ -208,6 +208,7 @@
 void CodeGenerator::GenerateSlowPaths() {
   size_t code_start = 0;
   for (SlowPathCode* slow_path : slow_paths_) {
+    current_slow_path_ = slow_path;
     if (disasm_info_ != nullptr) {
       code_start = GetAssembler()->CodeSize();
     }
@@ -216,6 +217,7 @@
       disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize());
     }
   }
+  current_slow_path_ = nullptr;
 }
 
 void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
@@ -308,7 +310,7 @@
 
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
-                                             size_t maximum_number_of_live_fp_registers,
+                                             size_t maximum_number_of_live_fpu_registers,
                                              size_t number_of_out_slots,
                                              const ArenaVector<HBasicBlock*>& block_order) {
   block_order_ = &block_order;
@@ -322,14 +324,14 @@
       && IsLeafMethod()
       && !RequiresCurrentMethod()) {
     DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
-    DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+    DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u);
     SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
   } else {
     SetFrameSize(RoundUp(
         number_of_spill_slots * kVRegSize
         + number_of_out_slots * kVRegSize
         + maximum_number_of_live_core_registers * GetWordSize()
-        + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+        + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
         + FrameEntrySpillSize(),
         kStackAlignment));
   }
@@ -381,11 +383,11 @@
     HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
     switch (call->GetMethodLoadKind()) {
       case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-        locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
+        locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
         break;
       case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
         locations->AddTemp(visitor->GetMethodLocation());
-        locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+        locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
         break;
       default:
         locations->AddTemp(visitor->GetMethodLocation());
@@ -545,15 +547,19 @@
   }
 }
 
+// TODO: Remove argument `code_generator_supports_read_barrier` when
+// all code generators have read barrier support.
 void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls,
                                                    Location runtime_type_index_location,
-                                                   Location runtime_return_location) {
+                                                   Location runtime_return_location,
+                                                   bool code_generator_supports_read_barrier) {
   ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
   LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
       ? LocationSummary::kCall
-      : (cls->CanCallRuntime()
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall);
+      : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
+          cls->CanCallRuntime())
+            ? LocationSummary::kCallOnSlowPath
+            : LocationSummary::kNoCall);
   LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind);
   if (cls->NeedsAccessCheck()) {
     locations->SetInAt(0, Location::NoLocation());
@@ -787,9 +793,10 @@
 }
 
 void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
+    ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
   const std::vector<uint8_t>& gc_map_raw =
-      dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
+      compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
+          ->GetDexGcMap();
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
 
   uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
@@ -911,19 +918,22 @@
   vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
 }
 
-void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) {
-  uint32_t size = stack_map_stream_.PrepareForFillIn();
-  data->resize(size);
-  MemoryRegion region(data->data(), size);
+size_t CodeGenerator::ComputeStackMapsSize() {
+  return stack_map_stream_.PrepareForFillIn();
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region) {
   stack_map_stream_.FillIn(region);
 }
 
 void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc,
                                           uintptr_t native_pc_begin,
                                           uintptr_t native_pc_end) {
-  if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) {
-    src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
-                                    static_cast<int32_t>(dex_pc)}));
+  if (compiler_options_.GetGenerateDebugInfo() &&
+      dex_pc != kNoDexPc &&
+      native_pc_begin != native_pc_end) {
+    src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
+                                   static_cast<int32_t>(dex_pc)}));
   }
 }
 
@@ -1314,21 +1324,38 @@
   // coherent with the runtime call generated, and that the GC side effect is
   // set when required.
   if (slow_path == nullptr) {
-    DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName();
+    DCHECK(instruction->GetLocations()->WillCall())
+        << "instruction->DebugName()=" << instruction->DebugName();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))
-        << instruction->DebugName() << instruction->GetSideEffects().ToString();
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
   } else {
     DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
-        << instruction->DebugName() << slow_path->GetDescription();
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
            // Control flow would not come back into the code if a fatal slow
            // path is taken, so we do not care if it triggers GC.
            slow_path->IsFatal() ||
            // HDeoptimize is a special case: we know we are not coming back from
            // it into the code.
-           instruction->IsDeoptimize())
-        << instruction->DebugName() << instruction->GetSideEffects().ToString()
-        << slow_path->GetDescription();
+           instruction->IsDeoptimize() ||
+           // When read barriers are enabled, some instructions use a
+           // slow path to emit a read barrier, which does not trigger
+           // GC, is not fatal, nor is emitted by HDeoptimize
+           // instructions.
+           (kEmitCompilerReadBarrier &&
+            (instruction->IsInstanceFieldGet() ||
+             instruction->IsStaticFieldGet() ||
+             instruction->IsArraySet() ||
+             instruction->IsArrayGet() ||
+             instruction->IsLoadClass() ||
+             instruction->IsLoadString() ||
+             instruction->IsInstanceOf() ||
+             instruction->IsCheckCast())))
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
+        << " slow_path->GetDescription()=" << slow_path->GetDescription();
   }
 
   // Check the coherency of leaf information.
@@ -1340,11 +1367,12 @@
 }
 
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
-  RegisterSet* register_set = locations->GetLiveRegisters();
+  RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
   for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
     if (!codegen->IsCoreCalleeSaveRegister(i)) {
-      if (register_set->ContainsCoreRegister(i)) {
+      if (live_registers->ContainsCoreRegister(i)) {
         // If the register holds an object, update the stack mask.
         if (locations->RegisterContainsObject(i)) {
           locations->SetStackBit(stack_offset / kVRegSize);
@@ -1359,7 +1387,7 @@
 
   for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
-      if (register_set->ContainsFloatingPointRegister(i)) {
+      if (live_registers->ContainsFloatingPointRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
         DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         saved_fpu_stack_offsets_[i] = stack_offset;
@@ -1370,12 +1398,14 @@
 }
 
 void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
-  RegisterSet* register_set = locations->GetLiveRegisters();
+  RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
   for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
     if (!codegen->IsCoreCalleeSaveRegister(i)) {
-      if (register_set->ContainsCoreRegister(i)) {
+      if (live_registers->ContainsCoreRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+        DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
       }
     }
@@ -1383,8 +1413,9 @@
 
   for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
-      if (register_set->ContainsFloatingPointRegister(i)) {
+      if (live_registers->ContainsFloatingPointRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+        DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
       }
     }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 47b6f30..114d97b 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/bit_field.h"
+#include "compiled_method.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
 #include "graph_visualizer.h"
@@ -51,13 +52,9 @@
 
 class Assembler;
 class CodeGenerator;
-class DexCompilationUnit;
+class CompilerDriver;
 class LinkerPatch;
 class ParallelMoveResolver;
-class SrcMapElem;
-template <class Alloc>
-class SrcMap;
-using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
 
 class CodeAllocator {
  public:
@@ -204,7 +201,7 @@
   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
   void InitializeCodeGeneration(size_t number_of_spill_slots,
                                 size_t maximum_number_of_live_core_registers,
-                                size_t maximum_number_of_live_fp_registers,
+                                size_t maximum_number_of_live_fpu_registers,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
   int32_t GetStackSlot(HLocal* local) const;
@@ -253,6 +250,15 @@
   // Returns whether we should split long moves in parallel moves.
   virtual bool ShouldSplitLongMoves() const { return false; }
 
+  size_t GetNumberOfCoreCalleeSaveRegisters() const {
+    return POPCOUNT(core_callee_save_mask_);
+  }
+
+  size_t GetNumberOfCoreCallerSaveRegisters() const {
+    DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
+    return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
+  }
+
   bool IsCoreCalleeSaveRegister(int reg) const {
     return (core_callee_save_mask_ & (1 << reg)) != 0;
   }
@@ -284,13 +290,12 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
-
   void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
   void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
   void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
-  void BuildStackMaps(ArenaVector<uint8_t>* vector);
+      ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
+  void BuildStackMaps(MemoryRegion region);
+  size_t ComputeStackMapsSize();
 
   bool IsBaseline() const {
     return is_baseline_;
@@ -420,7 +425,8 @@
   // TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
   static void CreateLoadClassLocationSummary(HLoadClass* cls,
                                              Location runtime_type_index_location,
-                                             Location runtime_return_location);
+                                             Location runtime_return_location,
+                                             bool code_generator_supports_read_barrier = false);
 
   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
 
@@ -446,6 +452,10 @@
   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
 
+  const ArenaVector<SrcMapElem>& GetSrcMappingTable() const {
+    return src_map_;
+  }
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
@@ -488,8 +498,9 @@
         stats_(stats),
         graph_(graph),
         compiler_options_(compiler_options),
-        src_map_(nullptr),
+        src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        current_slow_path_(nullptr),
         current_block_index_(0),
         is_leaf_(true),
         requires_current_method_(false) {
@@ -557,6 +568,10 @@
     return raw_pointer_to_labels_array + block->GetBlockId();
   }
 
+  SlowPathCode* GetCurrentSlowPath() {
+    return current_slow_path_;
+  }
+
   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
@@ -602,9 +617,12 @@
   const CompilerOptions& compiler_options_;
 
   // Native to dex_pc map used for native debugging/profiling tools.
-  DefaultSrcMap* src_map_;
+  ArenaVector<SrcMapElem> src_map_;
   ArenaVector<SlowPathCode*> slow_paths_;
 
+  // The current slow path that we're generating code for.
+  SlowPathCode* current_slow_path_;
+
   // The current block index in `block_order_` of the block
   // we are generating code for.
   size_t current_block_index_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3dc3b7f..ac6b5e8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -34,6 +34,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm {
 
 static bool ExpectedPairLayout(Location location) {
@@ -74,6 +77,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -98,6 +102,7 @@
     }
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -120,6 +125,7 @@
     SaveLiveRegisters(codegen, instruction_->GetLocations());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ b(GetReturnLabel());
@@ -176,6 +182,7 @@
         Primitive::kPrimInt);
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -211,6 +218,11 @@
         ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
         : QUICK_ENTRY_POINT(pInitializeType);
     arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -257,6 +269,7 @@
     __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
     arm_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
 
     RestoreLiveRegisters(codegen, locations);
@@ -286,15 +299,6 @@
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = locations->InAt(0).AsRegister<Register>();
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -315,6 +319,8 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
@@ -322,6 +328,7 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -354,6 +361,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -396,6 +404,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ b(GetExitLabel());
   }
@@ -408,6 +417,221 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction,
+                                         Location out,
+                                         Location ref,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::arm::Thumb2Assembler::Lsl and
+          // art::arm::Thumb2Assembler::AddConstant below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddConstant(index_reg, index_reg, offset_);
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_);
+    }
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ b(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
+};
+
 #undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
@@ -500,7 +724,9 @@
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -581,7 +807,7 @@
       LOG(FATAL) << "Unreachable type " << type;
   }
 
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
@@ -820,7 +1046,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const {
@@ -847,7 +1073,7 @@
     }
 
     case Primitive::kPrimVoid:
-      return Location();
+      return Location::NoLocation();
   }
 
   UNREACHABLE();
@@ -1240,26 +1466,19 @@
   __ b(true_label, final_condition);
 }
 
-void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr,
-                                                               HCondition* condition,
-                                                               Label* true_target,
-                                                               Label* false_target,
-                                                               Label* always_true_target) {
+void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
+                                                               Label* true_target_in,
+                                                               Label* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  Label fallthrough_target;
+  Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
   LocationSummary* locations = condition->GetLocations();
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  // We don't want true_target as a nullptr.
-  if (true_target == nullptr) {
-    true_target = always_true_target;
-  }
-  bool falls_through = (false_target == nullptr);
-
-  // FP compares don't like null false_targets.
-  if (false_target == nullptr) {
-    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  }
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong:
@@ -1278,103 +1497,125 @@
       LOG(FATAL) << "Unexpected compare type " << type;
   }
 
-  if (!falls_through) {
+  if (false_target != &fallthrough_target) {
     __ b(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
 void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction,
+                                                        size_t condition_input_index,
                                                         Label* true_target,
-                                                        Label* false_target,
-                                                        Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  if (cond->IsIntConstant()) {
+                                                        Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
     // Constant condition, statically compared against 1.
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ b(always_true_target);
+    if (cond->AsIntConstant()->IsOne()) {
+      if (true_target != nullptr) {
+        __ b(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsZero());
+      if (false_target != nullptr) {
+        __ b(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    // Condition has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+    DCHECK(cond_val.IsRegister());
+    if (true_target == nullptr) {
+      __ CompareAndBranchIfZero(cond_val.AsRegister<Register>(), false_target);
+    } else {
+      __ CompareAndBranchIfNonZero(cond_val.AsRegister<Register>(), true_target);
     }
   } else {
-    if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-      // Condition has been materialized, compare the output to 0
-      DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
-      __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
-                                   true_target);
-    } else {
-      // Condition has not been materialized, use its inputs as the
-      // comparison and its condition as the branch condition.
-      Primitive::Type type =
-          cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-      // Is this a long or FP comparison that has been folded into the HCondition?
-      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-        // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
-                                     true_target, false_target, always_true_target);
-        return;
-      }
+    // Condition has not been materialized. Use its inputs as the comparison and
+    // its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
 
-      LocationSummary* locations = cond->GetLocations();
-      DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0);
-      Register left = locations->InAt(0).AsRegister<Register>();
-      Location right = locations->InAt(1);
-      if (right.IsRegister()) {
-        __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
-      } else {
-        DCHECK(right.IsConstant());
-        GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
+    // If this is a long or FP comparison that has been folded into
+    // the HCondition, generate the comparison directly.
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      return;
+    }
+
+    LocationSummary* locations = cond->GetLocations();
+    DCHECK(locations->InAt(0).IsRegister());
+    Register left = locations->InAt(0).AsRegister<Register>();
+    Location right = locations->InAt(1);
+    if (right.IsRegister()) {
+      __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+    } else {
+      DCHECK(right.IsConstant());
+      GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+    }
+    if (true_target == nullptr) {
+      __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+    } else {
+      __ b(true_target, ARMCondition(condition->GetCondition()));
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ b(false_target);
   }
 }
 
 void LocationsBuilderARM::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCode* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathARM(deoptimize);
+  SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
   codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
 void LocationsBuilderARM::VisitCondition(HCondition* cond) {
@@ -1683,10 +1924,18 @@
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -1747,29 +1996,39 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  // Set the hidden argument.
-  __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
-                   invoke->GetDexMethodIndex());
+  // Set the hidden argument. This is safe to do this here, as R12
+  // won't be modified thereafter, before the `blx` (call) instruction.
+  DCHECK_EQ(R12, hidden_reg);
+  __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kArmWordSize).Int32Value();
+  uint32_t entry_point =
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -2173,6 +2432,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2181,6 +2441,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -2226,6 +2487,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickL2f, float, int64_t>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2680,7 +2942,7 @@
     case Primitive::kPrimInt: {
       if (div->InputAt(1)->IsConstant()) {
         locations->SetInAt(0, Location::RequiresRegister());
-        locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
+        locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
         int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
         if (abs_imm <= 1) {
@@ -2748,6 +3010,7 @@
         DCHECK_EQ(R0, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
@@ -2762,6 +3025,7 @@
       DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
 
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
@@ -2804,7 +3068,7 @@
     case Primitive::kPrimInt: {
       if (rem->InputAt(1)->IsConstant()) {
         locations->SetInAt(0, Location::RequiresRegister());
-        locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
+        locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
         int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
         if (abs_imm <= 1) {
@@ -2890,22 +3154,26 @@
         DCHECK_EQ(R1, out.AsRegister<Register>());
 
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
 
     case Primitive::kPrimLong: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       break;
     }
 
     case Primitive::kPrimFloat: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
 
     case Primitive::kPrimDouble: {
       codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
 
@@ -2975,17 +3243,29 @@
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
-      // Make the output overlap, as it will be used to hold the masked
-      // second input.
-      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        // Make the output overlap, as it will be used to hold the masked
+        // second input.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
       break;
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->AddTemp(Location::RequiresRegister());
-      locations->SetOut(Location::RequiresRegister());
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        // For simplicity, use kOutputOverlap even though we only require that low registers
+        // don't clash with high registers which the register allocator currently guarantees.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
       break;
     }
     default:
@@ -3006,9 +3286,9 @@
     case Primitive::kPrimInt: {
       Register out_reg = out.AsRegister<Register>();
       Register first_reg = first.AsRegister<Register>();
-      // Arm doesn't mask the shift count so we need to do it ourselves.
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
+        // Arm doesn't mask the shift count so we need to do it ourselves.
         __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
         if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, out_reg);
@@ -3036,57 +3316,115 @@
       Register o_h = out.AsRegisterPairHigh<Register>();
       Register o_l = out.AsRegisterPairLow<Register>();
 
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-
       Register high = first.AsRegisterPairHigh<Register>();
       Register low = first.AsRegisterPairLow<Register>();
 
-      Register second_reg = second.AsRegister<Register>();
+      if (second.IsRegister()) {
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
 
-      if (op->IsShl()) {
-        __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // Shift the high part
-        __ Lsl(o_h, high, o_l);
-        // Shift the low part and `or` what overflew on the high part
-        __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
-        __ Lsr(temp, low, temp);
-        __ orr(o_h, o_h, ShifterOperand(temp));
-        // If the shift is > 32 bits, override the high part
-        __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Lsl(o_h, low, temp, PL);
-        // Shift the low part
-        __ Lsl(o_l, low, o_l);
-      } else if (op->IsShr()) {
-        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // Shift the low part
-        __ Lsr(o_l, low, o_h);
-        // Shift the high part and `or` what underflew on the low part
-        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ Lsl(temp, high, temp);
-        __ orr(o_l, o_l, ShifterOperand(temp));
-        // If the shift is > 32 bits, override the low part
-        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Asr(o_l, high, temp, PL);
-        // Shift the high part
-        __ Asr(o_h, high, o_h);
+        Register second_reg = second.AsRegister<Register>();
+
+        if (op->IsShl()) {
+          __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
+          // Shift the high part
+          __ Lsl(o_h, high, o_l);
+          // Shift the low part and `or` what overflew on the high part
+          __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
+          __ Lsr(temp, low, temp);
+          __ orr(o_h, o_h, ShifterOperand(temp));
+          // If the shift is > 32 bits, override the high part
+          __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Lsl(o_h, low, temp, PL);
+          // Shift the low part
+          __ Lsl(o_l, low, o_l);
+        } else if (op->IsShr()) {
+          __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+          // Shift the low part
+          __ Lsr(o_l, low, o_h);
+          // Shift the high part and `or` what underflew on the low part
+          __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ Lsl(temp, high, temp);
+          __ orr(o_l, o_l, ShifterOperand(temp));
+          // If the shift is > 32 bits, override the low part
+          __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Asr(o_l, high, temp, PL);
+          // Shift the high part
+          __ Asr(o_h, high, o_h);
+        } else {
+          __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+          // same as Shr except we use `Lsr`s and not `Asr`s
+          __ Lsr(o_l, low, o_h);
+          __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ Lsl(temp, high, temp);
+          __ orr(o_l, o_l, ShifterOperand(temp));
+          __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+          __ it(PL);
+          __ Lsr(o_l, high, temp, PL);
+          __ Lsr(o_h, high, o_h);
+        }
       } else {
-        __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
-        // same as Shr except we use `Lsr`s and not `Asr`s
-        __ Lsr(o_l, low, o_h);
-        __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ Lsl(temp, high, temp);
-        __ orr(o_l, o_l, ShifterOperand(temp));
-        __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
-        __ it(PL);
-        __ Lsr(o_l, high, temp, PL);
-        __ Lsr(o_h, high, o_h);
+        // Register allocator doesn't create partial overlap.
+        DCHECK_NE(o_l, high);
+        DCHECK_NE(o_h, low);
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = static_cast<uint32_t>(cst & kMaxLongShiftValue);
+        if (shift_value > 32) {
+          if (op->IsShl()) {
+            __ Lsl(o_h, low, shift_value - 32);
+            __ LoadImmediate(o_l, 0);
+          } else if (op->IsShr()) {
+            __ Asr(o_l, high, shift_value - 32);
+            __ Asr(o_h, high, 31);
+          } else {
+            __ Lsr(o_l, high, shift_value - 32);
+            __ LoadImmediate(o_h, 0);
+          }
+        } else if (shift_value == 32) {
+          if (op->IsShl()) {
+            __ mov(o_h, ShifterOperand(low));
+            __ LoadImmediate(o_l, 0);
+          } else if (op->IsShr()) {
+            __ mov(o_l, ShifterOperand(high));
+            __ Asr(o_h, high, 31);
+          } else {
+            __ mov(o_l, ShifterOperand(high));
+            __ LoadImmediate(o_h, 0);
+          }
+        } else if (shift_value == 1) {
+          if (op->IsShl()) {
+            __ Lsls(o_l, low, 1);
+            __ adc(o_h, high, ShifterOperand(high));
+          } else if (op->IsShr()) {
+            __ Asrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          } else {
+            __ Lsrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          }
+        } else {
+          DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
+          if (op->IsShl()) {
+            __ Lsl(o_h, high, shift_value);
+            __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
+            __ Lsl(o_l, low, shift_value);
+          } else if (op->IsShr()) {
+            __ Lsr(o_l, low, shift_value);
+            __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+            __ Asr(o_h, high, shift_value);
+          } else {
+            __ Lsr(o_l, low, shift_value);
+            __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+            __ Lsr(o_h, high, shift_value);
+          }
+        }
       }
       break;
     }
     default:
       LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
   }
 }
 
@@ -3118,20 +3456,19 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(R0));
 }
 
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -3153,6 +3490,7 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -3334,6 +3672,9 @@
                                                          Register out_lo,
                                                          Register out_hi) {
   if (offset != 0) {
+    // Ensure `out_lo` is different from `addr`, so that loading
+    // `offset` into `out_lo` does not clutter `addr`.
+    DCHECK_NE(out_lo, addr);
     __ LoadImmediate(out_lo, offset);
     __ add(IP, addr, ShifterOperand(out_lo));
     addr = IP;
@@ -3521,14 +3862,26 @@
 
 void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
 
   bool volatile_for_double = field_info.IsVolatile()
       && (field_info.GetFieldType() == Primitive::kPrimDouble)
       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
-  bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong);
+  // The output overlaps in case of volatile long: we don't want the
+  // code generated by GenerateWideAtomicLoad to overwrite the
+  // object's location.  Likewise, in the case of an object field get
+  // with read barriers enabled, we do not want the load to overwrite
+  // the object's location, as we need it to emit the read barrier.
+  bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+      object_field_get_with_read_barrier;
 
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
@@ -3594,7 +3947,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  Register base = locations->InAt(0).AsRegister<Register>();
+  Location base_loc = locations->InAt(0);
+  Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
@@ -3674,7 +4028,7 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
   }
 }
 
@@ -3818,20 +4172,31 @@
 }
 
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type type = instruction->GetType();
 
@@ -3894,8 +4259,9 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
@@ -3958,8 +4324,17 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+    }
   }
 }
 
@@ -3968,11 +4343,16 @@
 
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(value_type)) {
@@ -3980,7 +4360,6 @@
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
   }
-
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
@@ -3990,10 +4369,11 @@
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register array = locations->InAt(0).AsRegister<Register>();
+  Location array_loc = locations->InAt(0);
+  Register array = array_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -4030,7 +4410,8 @@
 
     case Primitive::kPrimNot: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsRegister<Register>();
+      Location value_loc = locations->InAt(2);
+      Register value = value_loc.AsRegister<Register>();
       Register source = value;
 
       if (instruction->InputAt(2)->IsNullConstant()) {
@@ -4044,6 +4425,8 @@
           __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4056,7 +4439,7 @@
       Label done;
       SlowPathCode* slow_path = nullptr;
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4076,23 +4459,63 @@
           __ Bind(&non_zero);
         }
 
-        __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp1);
-        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
-        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
-        // No need to poison/unpoison, we're comparing two poisoined references.
-        __ cmp(temp1, ShifterOperand(temp2));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          Label do_put;
-          __ b(&do_put, EQ);
-          __ MaybeUnpoisonHeapReference(temp1);
-          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
-          // No need to poison/unpoison, we're comparing against null.
-          __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp1);
+          //   // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          //   __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
+          //
+          //   __ cmp(temp1, ShifterOperand(temp2));
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ b(slow_path->GetEntryLabel());
         } else {
-          __ b(slow_path->GetEntryLabel(), NE);
+          // /* HeapReference<Class> */ temp1 = array->klass_
+          __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->component_type_
+          __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+          // If heap poisoning is enabled, no need to unpoison `temp1`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ cmp(temp1, ShifterOperand(temp2));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            Label do_put;
+            __ b(&do_put, EQ);
+            // If heap poisoning is enabled, the `temp1` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp1);
+
+            // /* HeapReference<Class> */ temp1 = temp1->super_class_
+            __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp1`, as we are comparing against null below.
+            __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ b(slow_path->GetEntryLabel(), NE);
+          }
         }
       }
 
@@ -4116,7 +4539,7 @@
         __ StoreToOffset(kStoreWord, source, IP, data_offset);
       }
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4545,7 +4968,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(R0));
+      Location::RegisterLocation(R0),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -4556,33 +4980,59 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
+
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ LoadFromOffset(
-        kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ AddConstant(out, current_method, declaring_class_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+    }
   } else {
-    DCHECK(cls->CanCallRuntime());
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ LoadFromOffset(kLoadWord,
                       out,
                       current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
-    __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    // TODO: We will need a read barrier here.
 
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ AddConstant(out, out, cache_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
     } else {
-      __ Bind(slow_path->GetExitLabel());
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+    }
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -4628,13 +5078,35 @@
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ LoadFromOffset(
-      kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ AddConstant(out, current_method, declaring_class_offset);
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
-  __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
-  // TODO: We will need a read barrier here.
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ AddConstant(out, out, cache_offset);
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+  }
+
   __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4673,45 +5145,50 @@
 void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
   codegen_->InvokeRuntime(
       QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(R0));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4725,15 +5202,9 @@
     __ CompareAndBranchIfZero(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ LoadFromOffset(kLoadWord, target, obj, class_offset);
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -4744,13 +5215,23 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -4761,14 +5242,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop, success;
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ LoadFromOffset(kLoadWord, out, out, super_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -4779,14 +5270,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       Label exact_check;
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ LoadFromOffset(kLoadWord, out, out, component_offset);
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -4797,11 +5298,12 @@
       __ b(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ cmp(out, ShifterOperand(cls));
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel(), NE);
       __ LoadImmediate(out, 1);
@@ -4810,13 +5312,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved & interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                                    /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ b(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ b(&done);
       }
@@ -4842,57 +5356,61 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register temp = locations->WillCall()
-      ? Register(kNoRegister)
-      : locations->GetTemp(0).AsRegister<Register>();
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+                                                        is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   Label done;
   // Avoid null check if we know obj is not null.
@@ -4900,76 +5418,159 @@
     __ CompareAndBranchIfZero(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ LoadFromOffset(kLoadWord, obj, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ cmp(temp, ShifterOperand(cls));
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ b(slow_path->GetEntryLabel(), NE);
+      __ b(type_check_slow_path->GetEntryLabel(), NE);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop;
+      Label loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ CompareAndBranchIfNonZero(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       Label loop;
       __ Bind(&loop);
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ CompareAndBranchIfNonZero(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ b(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
-      __ MaybeUnpoisonHeapReference(temp);
-      __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
-      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+      __ CompareAndBranchIfZero(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved &
+      // interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ b(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -4985,6 +5586,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
@@ -5143,19 +5749,85 @@
   }
 }
 
+void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                Location out,
+                                                Location ref,
+                                                Location obj,
+                                                uint32_t offset,
+                                                Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                  Location out,
+                                                  Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ b(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   if (desired_dispatch_info.code_ptr_location ==
       HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5178,6 +5850,32 @@
   return desired_dispatch_info;
 }
 
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   switch (invoke->GetCodePtrLocation()) {
@@ -5200,7 +5898,7 @@
       __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -5209,13 +5907,17 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
@@ -5226,10 +5928,11 @@
         method_reg = reg;
         __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
       }
-      // temp = current_method->dex_cache_resolved_methods_;
-      __ LoadFromOffset(
-          kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset(
-              kArmPointerSize).Int32Value());
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+      __ LoadFromOffset(kLoadWord,
+                        reg,
+                        method_reg,
+                        ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
       // temp = temp[index_in_cache]
       uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
       __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
@@ -5270,13 +5973,24 @@
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -5290,7 +6004,11 @@
 
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5316,6 +6034,28 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const auto& pair : dex_cache_arrays_base_labels_) {
+    HArmDexCacheArraysBase* base = pair.first;
+    const DexCacheArraysBaseLabels* labels = &pair.second;
+    const DexFile& dex_file = base->GetDexFile();
+    size_t base_element_offset = base->GetElementOffset();
+    DCHECK(labels->add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(labels->movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(labels->movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -5427,6 +6167,23 @@
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+  codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index cef1095..193add2 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -228,15 +228,13 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
+                             Label* false_target);
   void GenerateCompareWithImmediate(Register left, int32_t right);
-  void GenerateCompareTestAndBranch(HIf* if_instr,
-                                    HCondition* condition,
+  void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
-                                    Label* false_target,
-                                    Label* always_true_target);
+                                    Label* false_target);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -375,8 +373,83 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct DexCacheArraysBaseLabels {
+    DexCacheArraysBaseLabels() = default;
+    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+    DexCacheArraysBaseLabels labels;
+    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+  }
+
+  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+    auto it = dex_cache_arrays_base_labels_.find(base);
+    DCHECK(it != dex_cache_arrays_base_labels_.end());
+    return &it->second;
+  }
+
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+                                                     DexCacheArraysBaseLabels,
+                                                     std::less<HArmDexCacheArraysBase*>>;
 
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -398,6 +471,8 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
 
+  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b0be446..04acd9d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -42,6 +42,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm64 {
 
 using helpers::CPURegisterFrom;
@@ -68,6 +71,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
+// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -427,15 +434,6 @@
 
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = InputRegisterAt(instruction_, 0);
-      Register temp = WRegisterFrom(locations->GetTemp(0));
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -450,11 +448,11 @@
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(
           QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+                           const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
-                           const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -490,6 +488,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -545,6 +544,293 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
 };
 
+void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+
+  // We are about to use the assembler to place literals directly. Make sure we have enough
+  // underlying code buffer and we have generated the jump table with right size.
+  CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t),
+                             CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize);
+
+  __ Bind(&table_start_);
+  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    vixl::Label* target_label = codegen->GetLabelOf(successors[i]);
+    DCHECK(target_label->IsBound());
+    ptrdiff_t jump_offset = target_label->location() - table_start_.location();
+    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+    Literal<int32_t> literal(jump_offset);
+    __ place(&literal);
+  }
+}
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ Ldr(out, HeapOperand(out, class_offset);
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+
+    // Note: In the case of a HArrayGet instruction, when the base
+    // address is a HArm64IntermediateAddress instruction, it does not
+    // point to the array object itself, but to an offset within this
+    // object. However, the read barrier entry point needs the array
+    // object address to be passed as first argument. So we
+    // temporarily set back `obj_` to that address, and restore its
+    // initial value later.
+    if (instruction_->IsArrayGet() &&
+        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* intermediate_address =
+            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+        uint32_t intermediate_address_offset =
+            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+        DCHECK_EQ(intermediate_address_offset, offset_);
+        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+      }
+      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+      __ Sub(obj_reg, obj_reg, offset_);
+    }
+
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
+        if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to vixl::MacroAssembler::Lsl and
+          // vixl::MacroAssembler::Mov below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg.W(), index_reg);
+          index_reg = free_reg;
+          index = LocationFrom(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Add(index_reg, index_reg, Operand(offset_));
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          LocationFrom(calling_convention.GetRegisterAt(0)),
+                          type,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          LocationFrom(calling_convention.GetRegisterAt(1)),
+                          type,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            LocationFrom(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
+    }
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+
+    // Restore the value of `obj_` when it corresponds to a
+    // HArm64IntermediateAddress instruction.
+    if (instruction_->IsArrayGet() &&
+        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* intermediate_address =
+            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+        uint32_t intermediate_address_offset =
+            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+        DCHECK_EQ(intermediate_address_offset, offset_);
+        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+      }
+      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+      __ Add(obj_reg, obj_reg, offset_);
+    }
+
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
+    size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return Register(VIXLRegCodeFromART(i), kXRegSize);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    // The argument of the ReadBarrierForRootSlow is not a managed
+    // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
+    // thus we need a 64-bit move here, and we cannot use
+    //
+    //   arm64_codegen->MoveLocation(
+    //       LocationFrom(calling_convention.GetRegisterAt(0)),
+    //       root_,
+    //       type);
+    //
+    // which would emit a 32-bit move, as `type` is a (32-bit wide)
+    // reference type (`Primitive::kPrimNot`).
+    __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -587,6 +873,7 @@
                     compiler_options,
                     stats),
       block_labels_(nullptr),
+      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -598,15 +885,21 @@
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
 }
 
-#undef __
 #define __ GetVIXLAssembler()->
 
+void CodeGeneratorARM64::EmitJumpTables() {
+  for (auto jump_table : jump_tables_) {
+    jump_table->EmitTable(this);
+  }
+}
+
 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+  EmitJumpTables();
   // Ensure we emit the literal pool.
   __ FinalizeCode();
 
@@ -1368,13 +1661,25 @@
 }
 
 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object field get when read barriers
+    // are enabled: we do not want the load to overwrite the object's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
@@ -1403,7 +1708,11 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+    LocationSummary* locations = instruction->GetLocations();
+    Location base = locations->InAt(0);
+    Location out = locations->Out();
+    uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
   }
 }
 
@@ -1580,6 +1889,82 @@
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+         instruction->GetType() == Primitive::kPrimLong);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  if (instruction->GetInstrKind() == HInstruction::kNeg) {
+    locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  Primitive::Type type = instruction->GetType();
+  HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+  Register out = OutputRegister(instruction);
+  Register left;
+  if (kind != HInstruction::kNeg) {
+    left = InputRegisterAt(instruction, 0);
+  }
+  // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+  // shifter operand operation, the IR generating `right_reg` (input to the type
+  // conversion) can have a different type from the current instruction's type,
+  // so we manually indicate the type.
+  Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
+  int64_t shift_amount = (type == Primitive::kPrimInt)
+    ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue)
+    : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue);
+
+  Operand right_operand(0);
+
+  HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+    right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
+  } else {
+    right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+  }
+
+  // Logical binary operations do not support extension operations in the
+  // operand. Note that VIXL would still manage if it was passed by generating
+  // the extension as a separate instruction.
+  // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
+  DCHECK(!right_operand.IsExtendedRegister() ||
+         (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
+          kind != HInstruction::kNeg));
+  switch (kind) {
+    case HInstruction::kAdd:
+      __ Add(out, left, right_operand);
+      break;
+    case HInstruction::kAnd:
+      __ And(out, left, right_operand);
+      break;
+    case HInstruction::kNeg:
+      DCHECK(instruction->InputAt(0)->AsConstant()->IsZero());
+      __ Neg(out, right_operand);
+      break;
+    case HInstruction::kOr:
+      __ Orr(out, left, right_operand);
+      break;
+    case HInstruction::kSub:
+      __ Sub(out, left, right_operand);
+      break;
+    case HInstruction::kXor:
+      __ Eor(out, left, right_operand);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation kind: " << kind;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1595,23 +1980,75 @@
          Operand(InputOperandAt(instruction, 1)));
 }
 
-void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+  Register res = OutputRegister(instr);
+  Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
+  Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
+  Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+
+  // Avoid emitting code that could trigger Cortex A53's erratum 835769.
+  // This fixup should be carried out for all multiply-accumulate instructions:
+  // madd, msub, smaddl, smsubl, umaddl and umsubl.
+  if (instr->GetType() == Primitive::kPrimLong &&
+      codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
+    MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
+    vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+    if (prev->IsLoadOrStore()) {
+      // Make sure we emit only exactly one nop.
+      vixl::CodeBufferCheckScope scope(masm,
+                                       vixl::kInstructionSize,
+                                       vixl::CodeBufferCheckScope::kCheck,
+                                       vixl::CodeBufferCheckScope::kExactSize);
+      __ nop();
+    }
+  }
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ Madd(res, mul_left, mul_right, accumulator);
+  } else {
+    DCHECK(instr->GetOpKind() == HInstruction::kSub);
+    __ Msub(res, mul_left, mul_right, accumulator);
+  }
+}
+
+void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   Primitive::Type type = instruction->GetType();
   Register obj = InputRegisterAt(instruction, 0);
-  Location index = instruction->GetLocations()->InAt(1);
-  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
+  LocationSummary* locations = instruction->GetLocations();
+  Location index = locations->InAt(1);
+  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   MemOperand source = HeapOperand(obj);
   CPURegister dest = OutputCPURegister(instruction);
 
@@ -1643,8 +2080,22 @@
   codegen_->Load(type, dest, source);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
-  if (instruction->GetType() == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(dest.W());
+  if (type == Primitive::kPrimNot) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    Location obj_loc = locations->InAt(0);
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
+      // not contain the base address of the array object, which is
+      // needed by the read barrier entry point. So the read barrier
+      // slow path will temporarily set back `obj_loc` to the right
+      // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+    }
   }
 }
 
@@ -1662,12 +2113,19 @@
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check  || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+  if (Primitive::IsFloatingPointType(value_type)) {
     locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
@@ -1677,7 +2135,7 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -1691,7 +2149,7 @@
   BlockPoolsScope block_pools(masm);
 
   if (!needs_write_barrier) {
-    DCHECK(!may_need_runtime_call);
+    DCHECK(!may_need_runtime_call_for_type_check);
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
       destination = HeapOperand(array, offset);
@@ -1741,7 +2199,7 @@
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -1756,26 +2214,66 @@
           __ Bind(&non_zero);
         }
 
-        Register temp2 = temps.AcquireSameSizeAs(array);
-        __ Ldr(temp, HeapOperand(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GetAssembler()->MaybeUnpoisonHeapReference(temp);
-        __ Ldr(temp, HeapOperand(temp, component_offset));
-        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ Cmp(temp, temp2);
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl::Label do_put;
-          __ B(eq, &do_put);
-          GetAssembler()->MaybeUnpoisonHeapReference(temp);
-          __ Ldr(temp, HeapOperand(temp, super_offset));
-          // No need to unpoison, we're comparing against null.
-          __ Cbnz(temp, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ Ldr(temp, HeapOperand(temp, component_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
+          //
+          //   __ Cmp(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ B(slow_path->GetEntryLabel());
         } else {
-          __ B(ne, slow_path->GetEntryLabel());
+          Register temp2 = temps.AcquireSameSizeAs(array);
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ Ldr(temp, HeapOperand(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ Ldr(temp, HeapOperand(temp, component_offset));
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ Cmp(temp, temp2);
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            vixl::Label do_put;
+            __ B(eq, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ Ldr(temp, HeapOperand(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ Cbnz(temp, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ B(ne, slow_path->GetEntryLabel());
+          }
+          temps.Release(temp2);
         }
-        temps.Release(temp2);
       }
 
       if (kPoisonHeapReferences) {
@@ -1791,7 +2289,7 @@
       }
       __ Str(source, destination);
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
     }
@@ -2283,38 +2781,56 @@
 }
 
 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
+                                                          size_t condition_input_index,
                                                           vixl::Label* true_target,
-                                                          vixl::Label* false_target,
-                                                          vixl::Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  HCondition* condition = cond->AsCondition();
+                                                          vixl::Label* false_target) {
+  // FP branching requires both targets to be explicit. If either of the targets
+  // is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  vixl::Label fallthrough_target;
+  HInstruction* cond = instruction->InputAt(condition_input_index);
 
-  if (cond->IsIntConstant()) {
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ B(always_true_target);
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against 1.
+    if (cond->AsIntConstant()->IsOne()) {
+      if (true_target != nullptr) {
+        __ B(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsZero());
+      if (false_target != nullptr) {
+        __ B(false_target);
+      }
     }
-  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
     // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = instruction->GetLocations()->InAt(0);
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-    __ Cbnz(InputRegisterAt(instruction, 0), true_target);
+      if (true_target == nullptr) {
+      __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
+    } else {
+      __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
+    }
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
-    Primitive::Type type =
-        cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
+    HCondition* condition = cond->AsCondition();
 
+    Primitive::Type type = condition->InputAt(0)->GetType();
     if (Primitive::IsFloatingPointType(type)) {
-      // FP compares don't like null false_targets.
-      if (false_target == nullptr) {
-        false_target = codegen_->GetLabelOf(instruction->AsIf()->IfFalseSuccessor());
-      }
       FPRegister lhs = InputFPRegisterAt(condition, 0);
       if (condition->GetLocations()->InAt(1).IsConstant()) {
         DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant()));
@@ -2324,31 +2840,45 @@
         __ Fcmp(lhs, InputFPRegisterAt(condition, 1));
       }
       if (condition->IsFPConditionTrueIfNaN()) {
-        __ B(vs, true_target);  // VS for unordered.
+        __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
       } else if (condition->IsFPConditionFalseIfNaN()) {
-        __ B(vs, false_target);  // VS for unordered.
+        __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
       }
-      __ B(ARM64Condition(condition->GetCondition()), true_target);
+      if (true_target == nullptr) {
+        __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+      } else {
+        __ B(ARM64Condition(condition->GetCondition()), true_target);
+      }
     } else {
       // Integer cases.
       Register lhs = InputRegisterAt(condition, 0);
       Operand rhs = InputOperandAt(condition, 1);
-      Condition arm64_cond = ARM64Condition(condition->GetCondition());
+
+      Condition arm64_cond;
+      vixl::Label* non_fallthrough_target;
+      if (true_target == nullptr) {
+        arm64_cond = ARM64Condition(condition->GetOppositeCondition());
+        non_fallthrough_target = false_target;
+      } else {
+        arm64_cond = ARM64Condition(condition->GetCondition());
+        non_fallthrough_target = true_target;
+      }
+
       if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
         switch (arm64_cond) {
           case eq:
-            __ Cbz(lhs, true_target);
+            __ Cbz(lhs, non_fallthrough_target);
             break;
           case ne:
-            __ Cbnz(lhs, true_target);
+            __ Cbnz(lhs, non_fallthrough_target);
             break;
           case lt:
             // Test the sign bit and branch accordingly.
-            __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+            __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
             break;
           case ge:
             // Test the sign bit and branch accordingly.
-            __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+            __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
             break;
           default:
             // Without the `static_cast` the compiler throws an error for
@@ -2357,43 +2887,43 @@
         }
       } else {
         __ Cmp(lhs, rhs);
-        __ B(arm64_cond, true_target);
+        __ B(arm64_cond, non_fallthrough_target);
       }
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ B(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
-  vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  vixl::Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
@@ -2402,8 +2932,10 @@
   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathARM64(deoptimize);
   codegen_->AddSlowPath(slow_path);
-  vixl::Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -2424,40 +2956,44 @@
 
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM64 uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
+  Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
@@ -2473,15 +3009,9 @@
     __ Cbz(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ Ldr(target, HeapOperand(obj.W(), class_offset));
-  GetAssembler()->MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ Ldr(out, HeapOperand(obj.W(), class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -2492,13 +3022,23 @@
       }
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, success;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -2509,14 +3049,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       vixl::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -2527,14 +3077,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       vixl::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ Ldr(out, HeapOperand(out, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -2545,11 +3105,12 @@
       __ B(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ B(ne, slow_path->GetEntryLabel());
       __ Mov(out, 1);
@@ -2558,13 +3119,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ B(&done);
       }
@@ -2590,58 +3163,62 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
-  Register temp;
-  if (!locations->WillCall()) {
-    temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
-  }
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCodeARM64* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCodeARM64* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                          is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   vixl::Label done;
   // Avoid null check if we know obj is not null.
@@ -2649,76 +3226,159 @@
     __ Cbz(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ Ldr(obj, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ Ldr(temp, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ Ldr(temp, HeapOperand(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ B(ne, slow_path->GetEntryLabel());
+      __ B(ne, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop;
+      vixl::Label loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ Cbnz(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       vixl::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
       __ B(eq, &done);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ B(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      vixl::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ Ldr(temp, HeapOperand(temp, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ Cbnz(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Cbz(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
@@ -2761,10 +3421,11 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = XRegisterFrom(locations->GetTemp(0));
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
-  Location receiver = invoke->GetLocations()->InAt(0);
+  Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
 
@@ -2776,14 +3437,22 @@
   scratch_scope.Exclude(ip1);
   __ Mov(ip1, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ Ldr(temp.W(), StackOperandFrom(receiver));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -2856,41 +3525,44 @@
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
       // temp = thread->string_init_entrypoint
-      __ Ldr(XRegisterFrom(temp).X(), MemOperand(tr, invoke->GetStringInitOffset()));
+      __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       // Load method address from literal pool.
-      __ Ldr(XRegisterFrom(temp).X(), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+      __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
       // Load method address from literal pool with a link-time patch.
-      __ Ldr(XRegisterFrom(temp).X(),
+      __ Ldr(XRegisterFrom(temp),
              DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       // Add ADRP with its PC-relative DexCache access patch.
-      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                             invoke->GetDexCacheArrayOffset());
-      vixl::Label* pc_insn_label = &pc_rel_dex_cache_patches_.back().label;
+      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                                  invoke->GetDexCacheArrayOffset());
+      vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label;
       {
         vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
-        __ adrp(XRegisterFrom(temp).X(), 0);
+        __ Bind(pc_insn_label);
+        __ adrp(XRegisterFrom(temp), 0);
       }
-      __ Bind(pc_insn_label);  // Bind after ADRP.
-      pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+      pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
       // Add LDR with its PC-relative DexCache access patch.
-      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                             invoke->GetDexCacheArrayOffset());
-      __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0));
-      __ Bind(&pc_rel_dex_cache_patches_.back().label);  // Bind after LDR.
-      pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                                  invoke->GetDexCacheArrayOffset());
+      {
+        vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+        __ Bind(&pc_relative_dex_cache_patches_.back().label);
+        __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), 0));
+        pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+      }
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = XRegisterFrom(temp);
       Register method_reg;
       if (current_method.IsRegister()) {
@@ -2902,7 +3574,7 @@
         __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
       }
 
-      // temp = current_method->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ Ldr(reg.X(),
              MemOperand(method_reg.X(),
                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
@@ -2920,8 +3592,9 @@
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
       relative_call_patches_.emplace_back(invoke->GetTargetMethod());
       vixl::Label* label = &relative_call_patches_.back().label;
-      __ Bl(label);  // Arbitrarily branch to the instruction after BL, override at link time.
-      __ Bind(label);  // Bind after BL.
+      vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+      __ Bind(label);
+      __ bl(0);  // Branch and link to itself. This will be overriden at link time.
       break;
     }
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
@@ -2934,7 +3607,7 @@
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // LR = callee_method->entry_point_from_quick_compiled_code_;
       __ Ldr(lr, MemOperand(
-          XRegisterFrom(callee_method).X(),
+          XRegisterFrom(callee_method),
           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
       // lr()
       __ Blr(lr);
@@ -2945,8 +3618,12 @@
 }
 
 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   Register temp = XRegisterFrom(temp_in);
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
@@ -2956,8 +3633,15 @@
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   DCHECK(receiver.IsRegister());
-  __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -2973,7 +3657,7 @@
       method_patches_.size() +
       call_patches_.size() +
       relative_call_patches_.size() +
-      pc_rel_dex_cache_patches_.size();
+      pc_relative_dex_cache_patches_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -2990,14 +3674,14 @@
                                                      target_method.dex_method_index));
   }
   for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
-    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location() - 4u,
+    linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(),
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
-  for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
-    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u,
+  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(),
                                                               &info.target_dex_file,
-                                                              info.pc_insn_label->location() - 4u,
+                                                              info.pc_insn_label->location(),
                                                               info.element_offset));
   }
 }
@@ -3070,7 +3754,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       LocationFrom(calling_convention.GetRegisterAt(0)),
-      LocationFrom(vixl::x0));
+      LocationFrom(vixl::x0),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3080,30 +3765,56 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
+  Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
   Register current_method = InputRegisterAt(cls, 0);
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
-    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
-
-    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Cbz(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ Add(out.X(), current_method.X(), declaring_class_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
     } else {
-      __ Bind(slow_path->GetExitLabel());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+    }
+  } else {
+    MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+    __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ Add(out.X(), out.X(), cache_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ Ldr(out, MemOperand(out.X(), cache_offset));
+    }
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Cbz(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3149,12 +3860,35 @@
   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
   codegen_->AddSlowPath(slow_path);
 
+  Location out_loc = load->GetLocations()->Out();
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
-  __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
-  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ Add(out.X(), current_method.X(), declaring_class_offset);
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ Add(out.X(), out.X(), cache_offset);
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ Ldr(out, MemOperand(out.X(), cache_offset));
+  }
+
   __ Cbz(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -3189,7 +3923,11 @@
       instruction,
       instruction->GetDexPc(),
       nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -3278,8 +4016,6 @@
   locations->SetOut(LocationFrom(x0));
   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
-  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
-                       void*, uint32_t, int32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -3301,17 +4037,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
-  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 }
 
 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
-  DCHECK(type_index.Is(w0));
-  __ Mov(type_index, instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
@@ -3488,6 +4219,11 @@
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
 
@@ -3732,9 +4468,7 @@
     int min_size = std::min(result_size, input_size);
     Register output = OutputRegister(conversion);
     Register source = InputRegisterAt(conversion, 0);
-    if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
-      __ Ubfx(output, source, 0, result_size * kBitsPerByte);
-    } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
       // 'int' values are used directly as W registers, discarding the top
       // bits, so we don't need to sign-extend and can just perform a move.
       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
@@ -3743,9 +4477,11 @@
       // 32bit input value as a 64bit value assuming that the top 32 bits are
       // zero.
       __ Mov(output.W(), source.W());
-    } else if ((result_type == Primitive::kPrimChar) ||
-               ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      __ Ubfx(output,
+              output.IsX() ? source.X() : source.W(),
+              0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
     } else {
       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     }
@@ -3810,29 +4546,152 @@
 
 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   Register value_reg = InputRegisterAt(switch_instr, 0);
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Cbz(value_reg, succ);
-    } else {
-      __ Cmp(value_reg, vixl::Operand(case_value));
-      __ B(eq, succ);
-    }
-  }
+  // Roughly set 16 as max average assemblies generated per HIR in a graph.
+  static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize;
+  // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
+  // make sure we don't emit it if the target may run out of range.
+  // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
+  // ranges and emit the tables only as required.
+  static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ B(codegen_->GetLabelOf(default_block));
+  if (num_entries < kPackedSwitchJumpTableThreshold ||
+      // Current instruction id is an upper bound of the number of HIRs in the graph.
+      GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
+    // Create a series of compare/jumps.
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    for (uint32_t i = 0; i < num_entries; i++) {
+      int32_t case_value = lower_bound + i;
+      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
+      if (case_value == 0) {
+        __ Cbz(value_reg, succ);
+      } else {
+        __ Cmp(value_reg, Operand(case_value));
+        __ B(eq, succ);
+      }
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ B(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr);
+    codegen_->AddJumpTable(jump_table);
+
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+
+    // Below instructions should use at most one blocked register. Since there are two blocked
+    // registers, we are free to block one.
+    Register temp_w = temps.AcquireW();
+    Register index;
+    // Remove the bias.
+    if (lower_bound != 0) {
+      index = temp_w;
+      __ Sub(index, value_reg, Operand(lower_bound));
+    } else {
+      index = value_reg;
+    }
+
+    // Jump to default block if index is out of the range.
+    __ Cmp(index, Operand(num_entries));
+    __ B(hs, codegen_->GetLabelOf(default_block));
+
+    // In current VIXL implementation, it won't require any blocked registers to encode the
+    // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
+    // register pressure.
+    Register table_base = temps.AcquireX();
+    // Load jump offset from the table.
+    __ Adr(table_base, jump_table->GetTableStartLabel());
+    Register jump_offset = temp_w;
+    __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
+
+    // Jump to target block by branching to table_base(pc related) + offset.
+    Register target_address = table_base;
+    __ Add(target_address, table_base, Operand(jump_offset, SXTW));
+    __ Br(target_address);
   }
 }
 
+void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Location ref,
+                                             Location obj,
+                                             uint32_t offset,
+                                             Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
+  }
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                    Location out,
+                                                    Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index ab684ea..7950f07 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -81,6 +81,22 @@
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
 };
 
+class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> {
+ public:
+  explicit JumpTableARM64(HPackedSwitch* switch_instr)
+    : switch_instr_(switch_instr), table_start_() {}
+
+  vixl::Label* GetTableStartLabel() { return &table_start_; }
+
+  void EmitTable(CodeGeneratorARM64* codegen);
+
+ private:
+  HPackedSwitch* const switch_instr_;
+  vixl::Label table_start_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
+};
+
 static const vixl::Register kRuntimeParameterCoreRegisters[] =
     { vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
@@ -203,9 +219,9 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              vixl::Label* true_target,
-                             vixl::Label* false_target,
-                             vixl::Label* always_true_target);
+                             vixl::Label* false_target);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -358,6 +374,10 @@
     block_labels_ = CommonInitializeLabels<vixl::Label>();
   }
 
+  void AddJumpTable(JumpTableARM64* jump_table) {
+    jump_tables_.push_back(jump_table);
+  }
+
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   // Code generation helpers.
@@ -404,6 +424,51 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
@@ -422,15 +487,16 @@
 
     const DexFile& target_dex_file;
     uint32_t element_offset;
-    // NOTE: Labels are bound to the end of the patched instruction because
-    // we don't know if there will be a veneer or how big it will be.
     vixl::Label label;
     vixl::Label* pc_insn_label;
   };
 
+  void EmitJumpTables();
+
   // Labels for each block that will be compiled.
   vixl::Label* block_labels_;  // Indexed by block id.
   vixl::Label frame_entry_label_;
+  ArenaVector<JumpTableARM64*> jump_tables_;
 
   LocationsBuilderARM64 location_builder_;
   InstructionCodeGeneratorARM64 instruction_visitor_;
@@ -447,7 +513,7 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
   // PC-relative DexCache access info.
-  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 29d08be..9dc9167 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -40,12 +40,8 @@
 static constexpr Register kMethodRegisterArgument = A0;
 
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr Register TMP = T8;
 static constexpr FRegister FTMP = F8;
 
-// ART Thread Register.
-static constexpr Register TR = S1;
-
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -419,13 +415,11 @@
                                   dex_pc,
                                   this,
                                   IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -465,6 +459,7 @@
                                 dex_pc,
                                 this,
                                 IsDirectEntrypoint(kQuickDeoptimize));
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -1732,12 +1727,11 @@
 }
 
 void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
-  Primitive::Type value_type = instruction->GetComponentType();
-  bool is_object = value_type == Primitive::kPrimNot;
+  bool needs_runtime_call = instruction->NeedsTypeCheck();
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (is_object) {
+      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -2425,30 +2419,51 @@
 }
 
 void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
+                                                         size_t condition_input_index,
                                                          MipsLabel* true_target,
-                                                         MipsLabel* false_target,
-                                                         MipsLabel* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  HCondition* condition = cond->AsCondition();
+                                                         MipsLabel* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
 
-  if (cond->IsIntConstant()) {
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ B(always_true_target);
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against 1.
+    if (cond->AsIntConstant()->IsOne()) {
+      if (true_target != nullptr) {
+        __ B(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsZero());
+      if (false_target != nullptr) {
+        __ B(false_target);
+      }
     }
-  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
     // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = instruction->GetLocations()->InAt(0);
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-    __ Bnez(cond_val.AsRegister<Register>(), true_target);
+      if (true_target == nullptr) {
+      __ Beqz(cond_val.AsRegister<Register>(), false_target);
+    } else {
+      __ Bnez(cond_val.AsRegister<Register>(), true_target);
+    }
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+
     Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>();
     Location rhs_location = condition->GetLocations()->InAt(1);
     Register rhs_reg = ZERO;
@@ -2460,37 +2475,46 @@
       rhs_reg = rhs_location.AsRegister<Register>();
     }
 
-    IfCondition if_cond = condition->GetCondition();
+    IfCondition if_cond;
+    MipsLabel* non_fallthrough_target;
+    if (true_target == nullptr) {
+      if_cond = condition->GetOppositeCondition();
+      non_fallthrough_target = false_target;
+    } else {
+      if_cond = condition->GetCondition();
+      non_fallthrough_target = true_target;
+    }
+
     if (use_imm && rhs_imm == 0) {
       switch (if_cond) {
         case kCondEQ:
-          __ Beqz(lhs, true_target);
+          __ Beqz(lhs, non_fallthrough_target);
           break;
         case kCondNE:
-          __ Bnez(lhs, true_target);
+          __ Bnez(lhs, non_fallthrough_target);
           break;
         case kCondLT:
-          __ Bltz(lhs, true_target);
+          __ Bltz(lhs, non_fallthrough_target);
           break;
         case kCondGE:
-          __ Bgez(lhs, true_target);
+          __ Bgez(lhs, non_fallthrough_target);
           break;
         case kCondLE:
-          __ Blez(lhs, true_target);
+          __ Blez(lhs, non_fallthrough_target);
           break;
         case kCondGT:
-          __ Bgtz(lhs, true_target);
+          __ Bgtz(lhs, non_fallthrough_target);
           break;
         case kCondB:
           break;  // always false
         case kCondBE:
-          __ Beqz(lhs, true_target);  // <= 0 if zero
+          __ Beqz(lhs, non_fallthrough_target);  // <= 0 if zero
           break;
         case kCondA:
-          __ Bnez(lhs, true_target);  // > 0 if non-zero
+          __ Bnez(lhs, non_fallthrough_target);  // > 0 if non-zero
           break;
         case kCondAE:
-          __ B(true_target);  // always true
+          __ B(non_fallthrough_target);  // always true
           break;
       }
     } else {
@@ -2501,81 +2525,78 @@
       }
       switch (if_cond) {
         case kCondEQ:
-          __ Beq(lhs, rhs_reg, true_target);
+          __ Beq(lhs, rhs_reg, non_fallthrough_target);
           break;
         case kCondNE:
-          __ Bne(lhs, rhs_reg, true_target);
+          __ Bne(lhs, rhs_reg, non_fallthrough_target);
           break;
         case kCondLT:
-          __ Blt(lhs, rhs_reg, true_target);
+          __ Blt(lhs, rhs_reg, non_fallthrough_target);
           break;
         case kCondGE:
-          __ Bge(lhs, rhs_reg, true_target);
+          __ Bge(lhs, rhs_reg, non_fallthrough_target);
           break;
         case kCondLE:
-          __ Bge(rhs_reg, lhs, true_target);
+          __ Bge(rhs_reg, lhs, non_fallthrough_target);
           break;
         case kCondGT:
-          __ Blt(rhs_reg, lhs, true_target);
+          __ Blt(rhs_reg, lhs, non_fallthrough_target);
           break;
         case kCondB:
-          __ Bltu(lhs, rhs_reg, true_target);
+          __ Bltu(lhs, rhs_reg, non_fallthrough_target);
           break;
         case kCondAE:
-          __ Bgeu(lhs, rhs_reg, true_target);
+          __ Bgeu(lhs, rhs_reg, non_fallthrough_target);
           break;
         case kCondBE:
-          __ Bgeu(rhs_reg, lhs, true_target);
+          __ Bgeu(rhs_reg, lhs, non_fallthrough_target);
           break;
         case kCondA:
-          __ Bltu(rhs_reg, lhs, true_target);
+          __ Bltu(rhs_reg, lhs, non_fallthrough_target);
           break;
       }
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ B(false_target);
   }
 }
 
 void LocationsBuilderMIPS::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
-  MipsLabel* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  MipsLabel* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  MipsLabel* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  MipsLabel* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
-      DeoptimizationSlowPathMIPS(deoptimize);
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
   codegen_->AddSlowPath(slow_path);
-  MipsLabel* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
 void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
@@ -2616,6 +2637,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2646,8 +2668,7 @@
 
   if (is_volatile && load_type == kLoadDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2670,21 +2691,34 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->Out().IsRegisterPair());
         dst = locations->Out().AsRegisterPairLow<Register>();
+        Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+        if (obj == dst) {
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+        } else {
+          __ LoadFromOffset(kLoadWord, dst, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+        }
       } else {
         DCHECK(locations->Out().IsRegister());
         dst = locations->Out().AsRegister<Register>();
+        __ LoadFromOffset(load_type, dst, obj, offset);
       }
-      __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->Out().IsFpuRegister());
       FRegister dst = locations->Out().AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadSFromOffset(dst, obj, offset);
       } else {
-        __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+        __ LoadDFromOffset(dst, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   if (is_volatile) {
@@ -2730,6 +2764,7 @@
   Register obj = locations->InAt(0).AsRegister<Register>();
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -2760,8 +2795,7 @@
 
   if (is_volatile && store_type == kStoreDoubleword) {
     InvokeRuntimeCallingConvention calling_convention;
-    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
-               obj, field_info.GetFieldOffset().Uint32Value());
+    __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
     // Do implicit Null check.
     __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2784,21 +2818,28 @@
       if (type == Primitive::kPrimLong) {
         DCHECK(locations->InAt(1).IsRegisterPair());
         src = locations->InAt(1).AsRegisterPairLow<Register>();
+        Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+        __ StoreToOffset(kStoreWord, src, obj, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
       } else {
         DCHECK(locations->InAt(1).IsRegister());
         src = locations->InAt(1).AsRegister<Register>();
+        __ StoreToOffset(store_type, src, obj, offset);
       }
-      __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
     } else {
       DCHECK(locations->InAt(1).IsFpuRegister());
       FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
-        __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreSToOffset(src, obj, offset);
       } else {
-        __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+        __ StoreDToOffset(src, obj, offset);
       }
     }
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+    // Longs are handled earlier.
+    if (type != Primitive::kPrimLong) {
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   // TODO: memory barriers?
@@ -3009,7 +3050,7 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -3021,7 +3062,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
       Register method_reg;
       if (current_method.IsRegister()) {
@@ -3148,6 +3189,7 @@
                             cls->GetDexPc(),
                             nullptr,
                             IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -3159,21 +3201,26 @@
     __ LoadFromOffset(kLoadWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadWord, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
     __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
-    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqz(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqz(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -3456,17 +3503,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  Register current_method_register = calling_convention.GetRegisterAt(1);
-  __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
-  // Move an uint16_t value to a register.
-  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(
       GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
@@ -3683,7 +3725,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmodf));
-      CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
       break;
     }
     case Primitive::kPrimDouble: {
@@ -3691,7 +3733,7 @@
                               instruction, instruction->GetDexPc(),
                               nullptr,
                               IsDirectEntrypoint(kQuickFmod));
-      CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
       break;
     }
     default:
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 059131d..e3a2cb4 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -226,9 +226,9 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              MipsLabel* true_target,
-                             MipsLabel* false_target,
-                             MipsLabel* always_true_target);
+                             MipsLabel* false_target);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   MipsAssembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 55efd5f..bc5eb31 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -16,19 +16,19 @@
 
 #include "code_generator_mips64.h"
 
+#include "art_method.h"
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_mips64.h"
-#include "art_method.h"
-#include "code_generator_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
 #include "thread.h"
-#include "utils/mips64/assembler_mips64.h"
 #include "utils/assembler.h"
+#include "utils/mips64/assembler_mips64.h"
 #include "utils/stack_checks.h"
 
 namespace art {
@@ -210,7 +210,7 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
@@ -257,7 +257,7 @@
                                  type);
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
@@ -312,13 +312,13 @@
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
-      __ B(GetReturnLabel());
+      __ Bc(GetReturnLabel());
     } else {
-      __ B(mips64_codegen->GetLabelOf(successor_));
+      __ Bc(mips64_codegen->GetLabelOf(successor_));
     }
   }
 
-  Label* GetReturnLabel() {
+  Mips64Label* GetReturnLabel() {
     DCHECK(successor_ == nullptr);
     return &return_label_;
   }
@@ -331,7 +331,7 @@
   HBasicBlock* const successor_;
 
   // If `successor_` is null, the label to branch to after the suspend check.
-  Label return_label_;
+  Mips64Label return_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64);
 };
@@ -366,13 +366,11 @@
                                     instruction_,
                                     dex_pc,
                                     this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial,
-                           uint32_t,
-                           const mirror::Class*,
-                           const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -380,7 +378,7 @@
     }
 
     RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
@@ -404,6 +402,7 @@
     uint32_t dex_pc = deoptimize->GetDexPc();
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
     mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -420,7 +419,7 @@
     : CodeGenerator(graph,
                     kNumberOfGpuRegisters,
                     kNumberOfFpuRegisters,
-                    0,  // kNumberOfRegisterPairs
+                    /* number_of_register_pairs */ 0,
                     ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
                                         arraysize(kCoreCalleeSaves)),
                     ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
@@ -441,6 +440,32 @@
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
 
 void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
+  // Ensure that we fix up branches.
+  __ FinalizeCode();
+
+  // Adjust native pc offsets in stack maps.
+  for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
+    uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset;
+    uint32_t new_position = __ GetAdjustedPosition(old_position);
+    DCHECK_GE(new_position, old_position);
+    stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
+  }
+
+  // Adjust pc offsets for the disassembly information.
+  if (disasm_info_ != nullptr) {
+    GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+    frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+    frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+    for (auto& it : *disasm_info_->GetInstructionIntervals()) {
+      it.second.start = __ GetAdjustedPosition(it.second.start);
+      it.second.end = __ GetAdjustedPosition(it.second.end);
+    }
+    for (auto& it : *disasm_info_->GetSlowPathIntervals()) {
+      it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start);
+      it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
+    }
+  }
+
   CodeGenerator::Finalize(allocator);
 }
 
@@ -603,6 +628,7 @@
   }
 
   __ Jr(RA);
+  __ Nop();
 
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
@@ -666,9 +692,19 @@
         gpr = destination.AsRegister<GpuRegister>();
       }
       if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
-        __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+        int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+        int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst64(gpr, value);
+        }
       }
       if (dst_type == Primitive::kPrimFloat) {
         __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
@@ -734,12 +770,22 @@
       // Move to stack from constant
       HConstant* src_cst = source.GetConstant();
       StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      GpuRegister gpr = ZERO;
       if (destination.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+        int32_t value = GetInt32ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+        DCHECK(destination.IsDoubleStackSlot());
+        int64_t value = GetInt64ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
       }
-      __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+      __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex());
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
       DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
@@ -755,9 +801,7 @@
   }
 }
 
-void CodeGeneratorMIPS64::SwapLocations(Location loc1,
-                                        Location loc2,
-                                        Primitive::Type type ATTRIBUTE_UNUSED) {
+void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) {
   DCHECK(!loc1.IsConstant());
   DCHECK(!loc2.IsConstant());
 
@@ -781,12 +825,16 @@
     // Swap 2 FPRs
     FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
     FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
-    // TODO: Can MOV.S/MOV.D be used here to save one instruction?
-    // Need to distinguish float from double, right?
-    __ Dmfc1(TMP, r2);
-    __ Dmfc1(AT, r1);
-    __ Dmtc1(TMP, r1);
-    __ Dmtc1(AT, r2);
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, r1);
+      __ MovS(r1, r2);
+      __ MovS(r2, FTMP);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, r1);
+      __ MovD(r1, r2);
+      __ MovD(r2, FTMP);
+    }
   } else if (is_slot1 != is_slot2) {
     // Swap GPR/FPR and stack slot
     Location reg_loc = is_slot1 ? loc2 : loc1;
@@ -800,7 +848,6 @@
                           reg_loc.AsFpuRegister<FpuRegister>(),
                           SP,
                           mem_loc.GetStackIndex());
-      // TODO: review this MTC1/DMTC1 move
       if (mem_loc.IsStackSlot()) {
         __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
       } else {
@@ -845,12 +892,22 @@
     } else {
       DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
       // Move to stack from constant
+      GpuRegister gpr = ZERO;
       if (location.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
-        __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+        int32_t value = GetInt32ValueOf(instruction->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
+        __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex());
       } else {
-        __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
-        __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+        DCHECK(location.IsDoubleStackSlot());
+        int64_t value = instruction->AsLongConstant()->GetValue();
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
+        __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex());
       }
     }
   } else if (instruction->IsTemporary()) {
@@ -908,7 +965,7 @@
 }
 
 void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
-  Label done;
+  Mips64Label done;
   GpuRegister card = AT;
   GpuRegister temp = TMP;
   __ Beqzc(value, &done);
@@ -1017,6 +1074,7 @@
   // TODO: anything related to T9/GP/GOT/PIC/.so's?
   __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
   __ Jalr(T9);
+  __ Nop();
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -1048,7 +1106,7 @@
     __ Bind(slow_path->GetReturnLabel());
   } else {
     __ Beqzc(TMP, codegen_->GetLabelOf(successor));
-    __ B(slow_path->GetEntryLabel());
+    __ Bc(slow_path->GetEntryLabel());
     // slow_path will return to GetLabelOf(successor).
   }
 }
@@ -1198,7 +1256,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     default:
@@ -1552,6 +1610,7 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
       }
       break;
     }
@@ -1638,12 +1697,7 @@
   // length is limited by the maximum positive signed 32-bit integer.
   // Unsigned comparison of length and index checks for index < 0
   // and for length <= index simultaneously.
-  // Mips R6 requires lhs != rhs for compact branches.
-  if (index == length) {
-    __ B(slow_path->GetEntryLabel());
-  } else {
-    __ Bgeuc(index, length, slow_path->GetEntryLabel());
-  }
+  __ Bgeuc(index, length, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
@@ -1707,7 +1761,7 @@
   switch (in_type) {
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1736,8 +1790,18 @@
     case Primitive::kPrimLong: {
       GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      // TODO: more efficient (direct) comparison with a constant
+      Location rhs_location = locations->InAt(1);
+      bool use_imm = rhs_location.IsConstant();
+      GpuRegister rhs = ZERO;
+      if (use_imm) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+        if (value != 0) {
+          rhs = AT;
+          __ LoadConst64(rhs, value);
+        }
+      } else {
+        rhs = rhs_location.AsRegister<GpuRegister>();
+      }
       __ Slt(TMP, lhs, rhs);
       __ Slt(dst, rhs, lhs);
       __ Subu(dst, dst, TMP);
@@ -1755,6 +1819,19 @@
                                                      : QUICK_ENTRY_POINT(pCmplDouble);
       }
       codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+      if (in_type == Primitive::kPrimFloat) {
+        if (instruction->IsGtBias()) {
+          CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
+        } else {
+          CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
+        }
+      } else {
+        if (instruction->IsGtBias()) {
+          CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
+        } else {
+          CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
+        }
+      }
       break;
     }
 
@@ -1902,6 +1979,252 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Move(out, ZERO);
+  } else {
+    if (imm == -1) {
+      if (type == Primitive::kPrimInt) {
+        __ Subu(out, ZERO, dividend);
+      } else {
+        DCHECK_EQ(type, Primitive::kPrimLong);
+        __ Dsubu(out, ZERO, dividend);
+      }
+    } else if (out != dividend) {
+      __ Move(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
+  DCHECK(IsPowerOfTwo(abs_imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (instruction->IsDiv()) {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Srl(TMP, dividend, 31);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+      }
+      __ Addu(out, dividend, TMP);
+      __ Sra(out, out, ctz_imm);
+      if (imm < 0) {
+        __ Subu(out, ZERO, out);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Dsrl32(TMP, dividend, 31);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+      }
+      __ Daddu(out, dividend, TMP);
+      if (ctz_imm < 32) {
+        __ Dsra(out, out, ctz_imm);
+      } else {
+        __ Dsra32(out, out, ctz_imm - 32);
+      }
+      if (imm < 0) {
+        __ Dsubu(out, ZERO, out);
+      }
+    }
+  } else {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Sra(TMP, dividend, 31);
+        __ Subu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Addu(out, out, TMP);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+        __ Addu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          __ Sll(out, out, 32 - ctz_imm);
+          __ Srl(out, out, 32 - ctz_imm);
+        }
+        __ Subu(out, out, TMP);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Dsra32(TMP, dividend, 31);
+        __ Dsubu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Daddu(out, out, TMP);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+        __ Daddu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          if (ctz_imm > 32) {
+            __ Dsll(out, out, 64 - ctz_imm);
+            __ Dsrl(out, out, 64 - ctz_imm);
+          } else {
+            __ Dsll32(out, out, 32 - ctz_imm);
+            __ Dsrl32(out, out, 32 - ctz_imm);
+          }
+        }
+        __ Dsubu(out, out, TMP);
+      }
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm,
+                                  (type == Primitive::kPrimLong),
+                                  &magic,
+                                  &shift);
+
+  if (type == Primitive::kPrimInt) {
+    __ LoadConst32(TMP, magic);
+    __ MuhR6(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Addu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Subu(TMP, TMP, dividend);
+    }
+
+    if (shift != 0) {
+      __ Sra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Sra(out, TMP, 31);
+      __ Subu(out, TMP, out);
+    } else {
+      __ Sra(AT, TMP, 31);
+      __ Subu(AT, TMP, AT);
+      __ LoadConst32(TMP, imm);
+      __ MulR6(TMP, AT, TMP);
+      __ Subu(out, dividend, TMP);
+    }
+  } else {
+    __ LoadConst64(TMP, magic);
+    __ Dmuh(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Daddu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Dsubu(TMP, TMP, dividend);
+    }
+
+    if (shift >= 32) {
+      __ Dsra32(TMP, TMP, shift - 32);
+    } else if (shift > 0) {
+      __ Dsra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Dsra32(out, TMP, 31);
+      __ Dsubu(out, TMP, out);
+    } else {
+      __ Dsra32(AT, TMP, 31);
+      __ Dsubu(AT, TMP, AT);
+      __ LoadConst64(TMP, imm);
+      __ Dmul(TMP, AT, TMP);
+      __ Dsubu(out, dividend, TMP);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(std::abs(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+    GpuRegister divisor = second.AsRegister<GpuRegister>();
+    if (instruction->IsDiv()) {
+      if (type == Primitive::kPrimInt)
+        __ DivR6(out, dividend, divisor);
+      else
+        __ Ddiv(out, dividend, divisor);
+    } else {
+      if (type == Primitive::kPrimInt)
+        __ ModR6(out, dividend, divisor);
+      else
+        __ Dmod(out, dividend, divisor);
+    }
+  }
+}
+
 void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1909,7 +2232,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1931,16 +2254,9 @@
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ DivR6(dst, lhs, rhs);
-      else
-        __ Ddiv(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -1984,7 +2300,7 @@
   if (value.IsConstant()) {
     int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
     if (divisor == 0) {
-      __ B(slow_path->GetEntryLabel());
+      __ Bc(slow_path->GetEntryLabel());
     } else {
       // A division by a non-null constant is valid. We don't need to perform
       // any check, so simply fall through.
@@ -2036,7 +2352,7 @@
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
-    __ B(codegen_->GetLabelOf(successor));
+    __ Bc(codegen_->GetLabelOf(successor));
   }
 }
 
@@ -2060,30 +2376,51 @@
 }
 
 void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
-                                                           Label* true_target,
-                                                           Label* false_target,
-                                                           Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  HCondition* condition = cond->AsCondition();
+                                                           size_t condition_input_index,
+                                                           Mips64Label* true_target,
+                                                           Mips64Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
 
-  if (cond->IsIntConstant()) {
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ B(always_true_target);
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against 1.
+    if (cond->AsIntConstant()->IsOne()) {
+      if (true_target != nullptr) {
+        __ Bc(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsZero());
+      if (false_target != nullptr) {
+        __ Bc(false_target);
+      }
     }
-  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
     // The condition instruction has been materialized, compare the output to 0.
-    Location cond_val = instruction->GetLocations()->InAt(0);
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-    __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+    if (true_target == nullptr) {
+      __ Beqzc(cond_val.AsRegister<GpuRegister>(), false_target);
+    } else {
+      __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+    }
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+
     GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
     Location rhs_location = condition->GetLocations()->InAt(1);
     GpuRegister rhs_reg = ZERO;
@@ -2095,37 +2432,46 @@
       rhs_reg = rhs_location.AsRegister<GpuRegister>();
     }
 
-    IfCondition if_cond = condition->GetCondition();
+    IfCondition if_cond;
+    Mips64Label* non_fallthrough_target;
+    if (true_target == nullptr) {
+      if_cond = condition->GetOppositeCondition();
+      non_fallthrough_target = false_target;
+    } else {
+      if_cond = condition->GetCondition();
+      non_fallthrough_target = true_target;
+    }
+
     if (use_imm && rhs_imm == 0) {
       switch (if_cond) {
         case kCondEQ:
-          __ Beqzc(lhs, true_target);
+          __ Beqzc(lhs, non_fallthrough_target);
           break;
         case kCondNE:
-          __ Bnezc(lhs, true_target);
+          __ Bnezc(lhs, non_fallthrough_target);
           break;
         case kCondLT:
-          __ Bltzc(lhs, true_target);
+          __ Bltzc(lhs, non_fallthrough_target);
           break;
         case kCondGE:
-          __ Bgezc(lhs, true_target);
+          __ Bgezc(lhs, non_fallthrough_target);
           break;
         case kCondLE:
-          __ Blezc(lhs, true_target);
+          __ Blezc(lhs, non_fallthrough_target);
           break;
         case kCondGT:
-          __ Bgtzc(lhs, true_target);
+          __ Bgtzc(lhs, non_fallthrough_target);
           break;
         case kCondB:
           break;  // always false
         case kCondBE:
-          __ Beqzc(lhs, true_target);  // <= 0 if zero
+          __ Beqzc(lhs, non_fallthrough_target);  // <= 0 if zero
           break;
         case kCondA:
-          __ Bnezc(lhs, true_target);  // > 0 if non-zero
+          __ Bnezc(lhs, non_fallthrough_target);  // > 0 if non-zero
           break;
         case kCondAE:
-          __ B(true_target);  // always true
+          __ Bc(non_fallthrough_target);  // always true
           break;
       }
     } else {
@@ -2133,96 +2479,69 @@
         rhs_reg = TMP;
         __ LoadConst32(rhs_reg, rhs_imm);
       }
-      // It looks like we can get here with lhs == rhs. Should that be possible at all?
-      // Mips R6 requires lhs != rhs for compact branches.
-      if (lhs == rhs_reg) {
-        DCHECK(!use_imm);
-        switch (if_cond) {
-          case kCondEQ:
-          case kCondGE:
-          case kCondLE:
-          case kCondBE:
-          case kCondAE:
-            // if lhs == rhs for a positive condition, then it is a branch
-            __ B(true_target);
-            break;
-          case kCondNE:
-          case kCondLT:
-          case kCondGT:
-          case kCondB:
-          case kCondA:
-            // if lhs == rhs for a negative condition, then it is a NOP
-            break;
-        }
-      } else {
-        switch (if_cond) {
-          case kCondEQ:
-            __ Beqc(lhs, rhs_reg, true_target);
-            break;
-          case kCondNE:
-            __ Bnec(lhs, rhs_reg, true_target);
-            break;
-          case kCondLT:
-            __ Bltc(lhs, rhs_reg, true_target);
-            break;
-          case kCondGE:
-            __ Bgec(lhs, rhs_reg, true_target);
-            break;
-          case kCondLE:
-            __ Bgec(rhs_reg, lhs, true_target);
-            break;
-          case kCondGT:
-            __ Bltc(rhs_reg, lhs, true_target);
-            break;
-          case kCondB:
-            __ Bltuc(lhs, rhs_reg, true_target);
-            break;
-          case kCondAE:
-            __ Bgeuc(lhs, rhs_reg, true_target);
-            break;
-          case kCondBE:
-            __ Bgeuc(rhs_reg, lhs, true_target);
-            break;
-          case kCondA:
-            __ Bltuc(rhs_reg, lhs, true_target);
-            break;
-        }
+      switch (if_cond) {
+        case kCondEQ:
+          __ Beqc(lhs, rhs_reg, non_fallthrough_target);
+          break;
+        case kCondNE:
+          __ Bnec(lhs, rhs_reg, non_fallthrough_target);
+          break;
+        case kCondLT:
+          __ Bltc(lhs, rhs_reg, non_fallthrough_target);
+          break;
+        case kCondGE:
+          __ Bgec(lhs, rhs_reg, non_fallthrough_target);
+          break;
+        case kCondLE:
+          __ Bgec(rhs_reg, lhs, non_fallthrough_target);
+          break;
+        case kCondGT:
+          __ Bltc(rhs_reg, lhs, non_fallthrough_target);
+          break;
+        case kCondB:
+          __ Bltuc(lhs, rhs_reg, non_fallthrough_target);
+          break;
+        case kCondAE:
+          __ Bgeuc(lhs, rhs_reg, non_fallthrough_target);
+          break;
+        case kCondBE:
+          __ Bgeuc(rhs_reg, lhs, non_fallthrough_target);
+          break;
+        case kCondA:
+          __ Bltuc(rhs_reg, lhs, non_fallthrough_target);
+          break;
       }
     }
   }
-  if (false_target != nullptr) {
-    __ B(false_target);
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
+    __ Bc(false_target);
   }
 }
 
 void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
@@ -2231,8 +2550,10 @@
   SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathMIPS64(deoptimize);
   codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
@@ -2387,7 +2708,7 @@
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
   GpuRegister out = locations->Out().AsRegister<GpuRegister>();
 
-  Label done;
+  Mips64Label done;
 
   // Return 0 if `obj` is null.
   // TODO: Avoid this check if we know `obj` is not null.
@@ -2482,6 +2803,7 @@
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+  __ Nop();
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -2512,10 +2834,12 @@
   // allocation of a register for the current method pointer like on x86 baseline.
   // TODO: remove this once all the issues with register saving/restoring are
   // sorted out.
-  LocationSummary* locations = invoke->GetLocations();
-  Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
-  if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-    locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+  if (invoke->HasCurrentMethodInput()) {
+    LocationSummary* locations = invoke->GetLocations();
+    Location location = locations->InAt(invoke->GetSpecialInputIndex());
+    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
+    }
   }
 }
 
@@ -2572,7 +2896,7 @@
                         invoke->GetStringInitOffset());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
@@ -2584,7 +2908,7 @@
       LOG(FATAL) << "Unsupported";
       UNREACHABLE();
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       GpuRegister reg = temp.AsRegister<GpuRegister>();
       GpuRegister method_reg;
       if (current_method.IsRegister()) {
@@ -2614,13 +2938,14 @@
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Jalr(&frame_entry_label_, T9);
+      __ Jialc(&frame_entry_label_, T9);
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
       // LR = invoke->GetDirectCodePtr();
       __ LoadConst64(T9, invoke->GetDirectCodePtr());
       // LR()
       __ Jalr(T9);
+      __ Nop();
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
@@ -2637,6 +2962,7 @@
                             kMips64WordSize).Int32Value());
       // T9()
       __ Jalr(T9);
+      __ Nop();
       break;
   }
   DCHECK(!IsLeafMethod());
@@ -2659,29 +2985,38 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
-    return;
-  }
+void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  GpuRegister receiver = calling_convention.GetRegisterAt(0);
 
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
-  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
 
   // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
+  MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+  __ Nop();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -2691,7 +3026,7 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(A0));
+      calling_convention.GetReturnLocation(cls->GetType()));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
@@ -2702,6 +3037,7 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
@@ -2713,22 +3049,26 @@
     __ LoadFromOffset(kLoadUnsignedWord, out, current_method,
                       ArtMethod::DeclaringClassOffset().Int32Value());
   } else {
-    DCHECK(cls->CanCallRuntime());
     __ LoadFromOffset(kLoadDoubleword, out, current_method,
                       ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
     __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
     // TODO: We will need a read barrier here.
-    SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
-        cls,
-        cls,
-        cls->GetDexPc(),
-        cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ Beqzc(out, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
-    } else {
-      __ Bind(slow_path->GetExitLabel());
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+          cls,
+          cls,
+          cls->GetDexPc(),
+          cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ Beqzc(out, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -2818,7 +3158,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
-  CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
@@ -2952,15 +3296,12 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  // Move an uint16_t value to a register.
-  __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
@@ -3108,7 +3449,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -3128,26 +3469,23 @@
 
 void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
   Primitive::Type type = instruction->GetType();
-  LocationSummary* locations = instruction->GetLocations();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ ModR6(dst, lhs, rhs);
-      else
-        __ Dmod(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
                                                              : QUICK_ENTRY_POINT(pFmod);
       codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+      if (type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
       break;
     }
     default:
@@ -3457,6 +3795,11 @@
                               conversion,
                               conversion->GetDexPc(),
                               nullptr);
+      if (result_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+      } else {
+        CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+      }
     }
   } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
@@ -3472,6 +3815,19 @@
                             conversion,
                             conversion->GetDexPc(),
                             nullptr);
+    if (result_type != Primitive::kPrimLong) {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+      }
+    } else {
+      if (input_type == Primitive::kPrimFloat) {
+        CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+      } else {
+        CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+      }
+    }
   } else if (Primitive::IsFloatingPointType(result_type) &&
              Primitive::IsFloatingPointType(input_type)) {
     FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -3623,7 +3979,7 @@
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
   for (int32_t i = 0; i < num_entries; i++) {
     int32_t case_value = lower_bound + i;
-    Label* succ = codegen_->GetLabelOf(successors[i]);
+    Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
     if (case_value == 0) {
       __ Beqzc(value_reg, succ);
     } else {
@@ -3634,7 +3990,7 @@
 
   // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-    __ B(codegen_->GetLabelOf(default_block));
+    __ Bc(codegen_->GetLabelOf(default_block));
   }
 }
 
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9bbd027..85e3a4a 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -119,9 +119,12 @@
   Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
     return Location::RegisterLocation(V0);
   }
-  Location GetSetValueLocation(
-      Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE {
-    return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1);
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? Location::RegisterLocation(A2)
+        : (is_instance
+            ? Location::RegisterLocation(A2)
+            : Location::RegisterLocation(A1));
   }
   Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
     return Location::FpuRegisterLocation(F0);
@@ -155,12 +158,12 @@
  public:
   SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
 
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
+  Mips64Label* GetEntryLabel() { return &entry_label_; }
+  Mips64Label* GetExitLabel() { return &exit_label_; }
 
  private:
-  Label entry_label_;
-  Label exit_label_;
+  Mips64Label entry_label_;
+  Mips64Label exit_label_;
 
   DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64);
 };
@@ -227,9 +230,13 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
-                             Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
+                             size_t condition_input_index,
+                             Mips64Label* true_target,
+                             Mips64Label* false_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   Mips64Assembler* const assembler_;
@@ -258,7 +265,7 @@
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
 
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
-    return GetLabelOf(block)->Position();
+    return assembler_.GetLabelLocation(GetLabelOf(block));
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
@@ -291,12 +298,12 @@
     return isa_features_;
   }
 
-  Label* GetLabelOf(HBasicBlock* block) const {
-    return CommonGetLabelOf<Label>(block_labels_, block);
+  Mips64Label* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<Mips64Label>(block_labels_, block);
   }
 
   void Initialize() OVERRIDE {
-    block_labels_ = CommonInitializeLabels<Label>();
+    block_labels_ = CommonInitializeLabels<Mips64Label>();
   }
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -333,10 +340,7 @@
       MethodReference target_method) OVERRIDE;
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
-                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL);
-  }
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
@@ -345,8 +349,8 @@
 
  private:
   // Labels for each block that will be compiled.
-  Label* block_labels_;  // Indexed by block id.
-  Label frame_entry_label_;
+  Mips64Label* block_labels_;  // Indexed by block id.
+  Mips64Label frame_entry_label_;
   LocationsBuilderMIPS64 location_builder_;
   InstructionCodeGeneratorMIPS64 instruction_visitor_;
   ParallelMoveResolverMIPS64 move_resolver_;
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 921c1d8..644a3fb 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_utils.h"
+#include "nodes.h"
 
 #include "base/logging.h"
 
@@ -94,4 +95,8 @@
   *shift = is_long ? p - 64 : p - 32;
 }
 
+bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) {
+  return !cond_input->IsCondition() || cond_input->AsCondition()->NeedsMaterialization();
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 59b495c..7efed8c 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -21,10 +21,17 @@
 
 namespace art {
 
+class HInstruction;
+
 // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
 // arguments `magic` and `shift`
 void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
 
+// Returns true if `cond_input` is expected to have a location. Assumes that
+// `cond_input` is a conditional input of the currently emitted instruction and
+// that it has been previously visited by the InstructionCodeGenerator.
+bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0df7e3b..2fb87d3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -19,7 +19,6 @@
 #include "art_method.h"
 #include "code_generator_utils.h"
 #include "compiled_method.h"
-#include "constant_area_fixups_x86.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
@@ -27,6 +26,7 @@
 #include "intrinsics_x86.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
+#include "pc_relative_fixups_x86.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -35,6 +35,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace x86 {
 
 static constexpr int kCurrentMethodStackOffset = 0;
@@ -64,6 +67,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -90,6 +94,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -149,6 +154,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -174,6 +180,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
@@ -219,6 +226,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
     x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
     RestoreLiveRegisters(codegen, locations);
 
@@ -254,6 +262,11 @@
     x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
                                           : QUICK_ENTRY_POINT(pInitializeType),
                                at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     // Move the class to the desired location.
     Location out = locations->Out();
@@ -300,15 +313,6 @@
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = locations->InAt(0).AsRegister<Register>();
-      Register temp = locations->GetTemp(0).AsRegister<Register>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ movl(temp, Address(obj, class_offset));
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -329,12 +333,15 @@
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
       x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -371,6 +378,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -413,6 +421,7 @@
                                instruction_,
                                instruction_->GetDexPc(),
                                this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -425,6 +434,221 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
+                                         Location out,
+                                         Location ref,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ movl(out, Address(out, offset));
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::x86::X86Assembler::shll and
+          // art::x86::X86Assembler::AddImmediate below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ movl(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ shll(index_reg, Immediate(TIMES_4));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddImmediate(index_reg, Immediate(offset_));
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
+    }
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on x86
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
+};
+
 #undef __
 #define __ down_cast<X86Assembler*>(GetAssembler())->
 
@@ -513,9 +737,9 @@
 }
 
 CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
-                   const X86InstructionSetFeatures& isa_features,
-                   const CompilerOptions& compiler_options,
-                   OptimizingCompilerStats* stats)
+                                   const X86InstructionSetFeatures& isa_features,
+                                   const CompilerOptions& compiler_options,
+                                   OptimizingCompilerStats* stats)
     : CodeGenerator(graph,
                     kNumberOfCpuRegisters,
                     kNumberOfXmmRegisters,
@@ -533,6 +757,7 @@
       isa_features_(isa_features),
       method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Use a fake return address register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
@@ -581,7 +806,7 @@
       LOG(FATAL) << "Unreachable type " << type;
   }
 
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
@@ -782,7 +1007,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorX86::Move32(Location destination, Location source) {
@@ -1157,26 +1382,19 @@
   __ j(final_condition, true_label);
 }
 
-void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr,
-                                                               HCondition* condition,
-                                                               Label* true_target,
-                                                               Label* false_target,
-                                                               Label* always_true_target) {
+void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
+                                                               Label* true_target_in,
+                                                               Label* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  Label fallthrough_target;
+  Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
   LocationSummary* locations = condition->GetLocations();
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  // We don't want true_target as a nullptr.
-  if (true_target == nullptr) {
-    true_target = always_true_target;
-  }
-  bool falls_through = (false_target == nullptr);
-
-  // FP compares don't like null false_targets.
-  if (false_target == nullptr) {
-    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  }
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong:
@@ -1194,119 +1412,141 @@
       LOG(FATAL) << "Unexpected compare type " << type;
   }
 
-  if (!falls_through) {
+  if (false_target != &fallthrough_target) {
     __ jmp(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
+static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+  // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
+  // are set only strictly before `branch`. We can't use the eflags on long/FP
+  // conditions if they are materialized due to the complex branching.
+  return cond->IsCondition() &&
+         cond->GetNext() == branch &&
+         cond->InputAt(0)->GetType() != Primitive::kPrimLong &&
+         !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
+}
+
 void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
+                                                        size_t condition_input_index,
                                                         Label* true_target,
-                                                        Label* false_target,
-                                                        Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  if (cond->IsIntConstant()) {
+                                                        Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
     // Constant condition, statically compared against 1.
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ jmp(always_true_target);
+    if (cond->AsIntConstant()->IsOne()) {
+      if (true_target != nullptr) {
+        __ jmp(true_target);
       }
-      return;
     } else {
-      DCHECK_EQ(cond_value, 0);
+      DCHECK(cond->AsIntConstant()->IsZero());
+      if (false_target != nullptr) {
+        __ jmp(false_target);
+      }
     }
-  } else {
-    bool is_materialized =
-        !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
-    // Moves do not affect the eflags register, so if the condition is
-    // evaluated just before the if, we don't need to evaluate it
-    // again.  We can't use the eflags on long/FP conditions if they are
-    // materialized due to the complex branching.
-    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-    bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
-        && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type));
-    if (is_materialized) {
-      if (!eflags_set) {
-        // Materialized condition, compare against 0.
-        Location lhs = instruction->GetLocations()->InAt(0);
-        if (lhs.IsRegister()) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
-        }
-        __ j(kNotEqual, true_target);
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    if (AreEflagsSetFrom(cond, instruction)) {
+      if (true_target == nullptr) {
+        __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
       } else {
         __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
-      // Condition has not been materialized, use its inputs as the
-      // comparison and its condition as the branch condition.
-
-      // Is this a long or FP comparison that has been folded into the HCondition?
-      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-        // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(),
-                                     cond->AsCondition(),
-                                     true_target,
-                                     false_target,
-                                     always_true_target);
-        return;
-      }
-
-      Location lhs = cond->GetLocations()->InAt(0);
-      Location rhs = cond->GetLocations()->InAt(1);
-      // LHS is guaranteed to be in a register (see
-      // LocationsBuilderX86::VisitCondition).
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
-        } else {
-          __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
-        }
+      // Materialized condition, compare against 0.
+      Location lhs = instruction->GetLocations()->InAt(condition_input_index);
+      if (lhs.IsRegister()) {
+        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
       } else {
-        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+        __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
       }
-      __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
+      if (true_target == nullptr) {
+        __ j(kEqual, false_target);
+      } else {
+        __ j(kNotEqual, true_target);
+      }
+    }
+  } else {
+    // Condition has not been materialized, use its inputs as the comparison and
+    // its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+
+    // If this is a long or FP comparison that has been folded into
+    // the HCondition, generate the comparison directly.
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      return;
+    }
+
+    Location lhs = condition->GetLocations()->InAt(0);
+    Location rhs = condition->GetLocations()->InAt(1);
+    // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
+    } else if (rhs.IsConstant()) {
+      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+      if (constant == 0) {
+        __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
+      } else {
+        __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
+      }
+    } else {
+      __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+    }
+    if (true_target == nullptr) {
+      __ j(X86Condition(condition->GetOppositeCondition()), false_target);
+    } else {
+      __ j(X86Condition(condition->GetCondition()), true_target);
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ jmp(false_target);
   }
 }
 
 void LocationsBuilderX86::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
@@ -1315,8 +1555,10 @@
   SlowPathCode* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathX86(deoptimize);
   codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
 void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -1677,19 +1919,27 @@
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
 
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
+
   if (codegen_->IsBaseline()) {
     // Baseline does not have enough registers if the current method also
     // needs a register. We therefore do not require a register for it, and let
     // the code generation of the invoke handle it.
     LocationSummary* locations = invoke->GetLocations();
-    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+    Location location = locations->InAt(invoke->GetSpecialInputIndex());
     if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+      locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
     }
   }
 }
@@ -1719,6 +1969,11 @@
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -1738,6 +1993,9 @@
 }
 
 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // This call to HandleInvoke allocates a temporary (core) register
+  // which is also used to transfer the hidden argument from FP to
+  // core register.
   HandleInvoke(invoke);
   // Add the hidden argument.
   invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
@@ -1745,31 +2003,42 @@
 
 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+  XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
-  // Set the hidden argument.
+  // Set the hidden argument. This is safe to do this here, as XMM7
+  // won't be modified thereafter, before the `call` instruction.
+  DCHECK_EQ(XMM7, hidden_reg);
   __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
-  __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp);
+  __ movd(hidden_reg, temp);
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ movl(temp, Address(temp, class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86WordSize).Int32Value()));
+  __ call(Address(temp,
+                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2207,6 +2476,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickF2l, int64_t, float>();
           break;
 
         case Primitive::kPrimDouble:
@@ -2215,6 +2485,7 @@
                                   conversion,
                                   conversion->GetDexPc(),
                                   nullptr);
+          CheckEntrypointTypes<kQuickD2l, int64_t, double>();
           break;
 
         default:
@@ -2995,7 +3266,7 @@
       DCHECK_EQ(EAX, first.AsRegister<Register>());
       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
 
-      if (instruction->InputAt(1)->IsIntConstant()) {
+      if (second.IsConstant()) {
         int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
 
         if (imm == 0) {
@@ -3045,11 +3316,13 @@
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       } else {
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
                                 instruction,
                                 instruction->GetDexPc(),
                                 nullptr);
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
       }
       break;
     }
@@ -3516,19 +3789,18 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   locations->SetOut(Location::RegisterLocation(EAX));
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
 }
 
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -3545,13 +3817,13 @@
 void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -3760,16 +4032,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method ATTRIBUTE_UNUSED) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   switch (desired_dispatch_info.code_ptr_location) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
     case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
@@ -3786,6 +4048,32 @@
   }
 }
 
+Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ movl(temp, Address(ESP, location.GetStackIndex()));
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ movl(temp, Address(ESP, stack_offset));
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
@@ -3794,7 +4082,7 @@
       __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
@@ -3804,13 +4092,18 @@
       method_patches_.emplace_back(invoke->GetTargetMethod());
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      uint32_t offset = invoke->GetDexCacheArrayOffset();
+      __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+      // Add the patch entry and bind its label at the end of the instruction.
+      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, offset);
+      __ Bind(&pc_relative_dex_cache_patches_.back().label);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       Register reg = temp.AsRegister<Register>();
       if (current_method.IsRegister()) {
@@ -3821,7 +4114,7 @@
         method_reg = reg;
         __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
       }
-      // temp = temp->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ movl(reg, Address(method_reg,
                            ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
       // temp = temp[index_in_cache]
@@ -3862,13 +4155,24 @@
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ movl(temp, Address(receiver, class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
@@ -3879,23 +4183,33 @@
 
 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  linker_patches->reserve(method_patches_.size() + relative_call_patches_.size());
+  size_t size =
+      method_patches_.size() +
+      relative_call_patches_.size() +
+      pc_relative_dex_cache_patches_.size();
+  linker_patches->reserve(size);
+  // The label points to the end of the "movl" insn but the literal offset for method
+  // patch needs to point to the embedded constant which occupies the last 4 bytes.
+  constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
   for (const MethodPatchInfo<Label>& info : method_patches_) {
-    // The label points to the end of the "movl" insn but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
                                                        info.target_method.dex_file,
                                                        info.target_method.dex_method_index));
   }
   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
-    // The label points to the end of the "call" insn but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
+                                                              &info.target_dex_file,
+                                                              GetMethodAddressOffset(),
+                                                              info.element_offset));
+  }
 }
 
 void CodeGeneratorX86::MarkGCCard(Register temp,
@@ -3920,18 +4234,29 @@
 
 void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   kEmitCompilerReadBarrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
 
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    // The output overlaps in case of long: we don't want the low move to overwrite
-    // the object's location.
-    locations->SetOut(Location::RequiresRegister(),
-        (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
-                                                         : Location::kNoOutputOverlap);
+    // The output overlaps in case of long: we don't want the low move
+    // to overwrite the object's location.  Likewise, in the case of
+    // an object field get with read barriers enabled, we do not want
+    // the move to overwrite the object's location, as we need it to emit
+    // the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ?
+            Location::kOutputOverlap :
+            Location::kNoOutputOverlap);
   }
 
   if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
@@ -3947,7 +4272,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  Register base = locations->InAt(0).AsRegister<Register>();
+  Location base_loc = locations->InAt(0);
+  Register base = base_loc.AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   Primitive::Type field_type = field_info.GetFieldType();
@@ -4022,7 +4348,7 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
   }
 }
 
@@ -4043,16 +4369,16 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(1, Location::RegisterLocation(EAX));
   } else if (Primitive::IsFloatingPointType(field_type)) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
-  } else {
+    if (is_volatile && field_type == Primitive::kPrimDouble) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
+  } else if (is_volatile && field_type == Primitive::kPrimLong) {
+    // In order to satisfy the semantics of volatile, this must be a single instruction store.
     locations->SetInAt(1, Location::RequiresRegister());
-  }
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-    // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
-    // Ensure the card is in a byte register.
-    locations->AddTemp(Location::RegisterLocation(ECX));
-  } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
+
     // 64bits value can be atomically written to an address with movsd and an XMM register.
     // We need two XMM registers because there's no easier way to (bit) copy a register pair
     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
@@ -4060,6 +4386,15 @@
     // isolated cases when we need this it isn't worth adding the extra complexity.
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+
+    if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
+      // Ensure the card is in a byte register.
+      locations->AddTemp(Location::RegisterLocation(ECX));
+    }
   }
 }
 
@@ -4081,6 +4416,8 @@
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
@@ -4090,7 +4427,12 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      __ movw(Address(base, offset), value.AsRegister<Register>());
+      if (value.IsConstant()) {
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movw(Address(base, offset), value.AsRegister<Register>());
+      }
       break;
     }
 
@@ -4105,6 +4447,9 @@
         __ movl(temp, value.AsRegister<Register>());
         __ PoisonHeapReference(temp);
         __ movl(Address(base, offset), temp);
+      } else if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
       } else {
         __ movl(Address(base, offset), value.AsRegister<Register>());
       }
@@ -4120,21 +4465,40 @@
         __ punpckldq(temp1, temp2);
         __ movsd(Address(base, offset), temp1);
         codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
       } else {
         __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
       }
+      maybe_record_implicit_null_check_done = true;
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -4143,8 +4507,7 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (!maybe_record_implicit_null_check_done) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
@@ -4313,24 +4676,35 @@
 }
 
 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    // The output overlaps in case of long: we don't want the low move to overwrite
-    // the array's location.
-    locations->SetOut(Location::RequiresRegister(),
-        (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
-                                                         : Location::kNoOutputOverlap);
+    // The output overlaps in case of long: we don't want the low move
+    // to overwrite the array's location.  Likewise, in the case of an
+    // object array get with read barriers enabled, we do not want the
+    // move to overwrite the array's location, as we need it to emit
+    // the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ?
+            Location::kOutputOverlap :
+            Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
 
   Primitive::Type type = instruction->GetType();
@@ -4385,6 +4759,9 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
@@ -4449,8 +4826,17 @@
   }
 
   if (type == Primitive::kPrimNot) {
-    Register out = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(out);
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+    }
   }
 }
 
@@ -4461,14 +4847,18 @@
   // optimization.
 
   Primitive::Type value_type = instruction->GetComponentType();
+
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-
-  bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
 
   bool is_byte_type = (value_type == Primitive::kPrimBoolean)
       || (value_type == Primitive::kPrimByte);
@@ -4481,7 +4871,7 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
   } else if (Primitive::IsFloatingPointType(value_type)) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
@@ -4495,14 +4885,15 @@
 
 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register array = locations->InAt(0).AsRegister<Register>();
+  Location array_loc = locations->InAt(0);
+  Register array = array_loc.AsRegister<Register>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -4542,6 +4933,7 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+
       if (!value.IsRegister()) {
         // Just setting null.
         DCHECK(instruction->InputAt(2)->IsNullConstant());
@@ -4549,7 +4941,7 @@
         __ movl(address, Immediate(0));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         DCHECK(!needs_write_barrier);
-        DCHECK(!may_need_runtime_call);
+        DCHECK(!may_need_runtime_call_for_type_check);
         break;
       }
 
@@ -4558,7 +4950,7 @@
       NearLabel done, not_null, do_put;
       SlowPathCode* slow_path = nullptr;
       Register temp = locations->GetTemp(0).AsRegister<Register>();
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -4570,22 +4962,62 @@
           __ Bind(&not_null);
         }
 
-        __ movl(temp, Address(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp);
-        __ movl(temp, Address(temp, component_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ cmpl(temp, Address(register_value, class_offset));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          __ j(kEqual, &do_put);
-          __ MaybeUnpoisonHeapReference(temp);
-          __ movl(temp, Address(temp, super_offset));
-          // No need to unpoison, we're comparing against null..
-          __ testl(temp, temp);
-          __ j(kNotEqual, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ movl(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ movl(temp, Address(temp, component_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
+          //   __ movl(temp2, Address(register_value, class_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+          //
+          //   __ cmpl(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ jmp(slow_path->GetEntryLabel());
         } else {
-          __ j(kNotEqual, slow_path->GetEntryLabel());
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ movl(temp, Address(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ movl(temp, Address(temp, component_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor the object reference in `register_value->klass`, as
+          // we are comparing two poisoned references.
+          __ cmpl(temp, Address(register_value, class_offset));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            __ j(kEqual, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ movl(temp, Address(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ testl(temp, temp);
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+          }
         }
       }
 
@@ -4596,7 +5028,7 @@
       } else {
         __ movl(address, register_value);
       }
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
 
@@ -4611,6 +5043,7 @@
 
       break;
     }
+
     case Primitive::kPrimInt: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Address address = index.IsConstant()
@@ -4667,8 +5100,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4677,8 +5116,19 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        Address address_hi = index.IsConstant() ?
+            Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                           offset + kX86WordSize) :
+            Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize);
+        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        __ movl(address, Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(address_hi, Immediate(High32Bits(v)));
+      }
       break;
     }
 
@@ -5064,7 +5514,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(EAX));
+      Location::RegisterLocation(EAX),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
@@ -5075,31 +5526,60 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
+
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    __ movl(out, Address(
-        current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
-    __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
-
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ leal(out, Address(current_method, declaring_class_offset));
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
     } else {
-      __ Bind(slow_path->GetExitLabel());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ movl(out, Address(current_method, declaring_class_offset));
+    }
+  } else {
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+    __ movl(out, Address(current_method,
+                         ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ leal(out, Address(out, cache_offset));
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ movl(out, Address(out, cache_offset));
+    }
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+
+      if (!cls->IsInDexCache()) {
+        __ testl(out, out);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -5143,12 +5623,35 @@
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   Register current_method = locations->InAt(0).AsRegister<Register>();
-  __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ leal(out, Address(current_method, declaring_class_offset));
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ movl(out, Address(current_method, declaring_class_offset));
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-  __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ leal(out, Address(out, cache_offset));
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ movl(out, Address(out, cache_offset));
+  }
+
   __ testl(out, out);
   __ j(kEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
@@ -5188,44 +5691,49 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86 uses this register too.
-    locations->SetOut(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(EAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this "out" register too.
+  locations->SetOut(Location::RequiresRegister());
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5240,15 +5748,9 @@
     __ j(kEqual, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ movl(target, Address(obj, class_offset));
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ movl(out, Address(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -5265,13 +5767,23 @@
       __ jmp(&done);
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ movl(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5288,6 +5800,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
@@ -5299,8 +5812,17 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ movl(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5312,6 +5834,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       NearLabel exact_check;
@@ -5322,9 +5845,18 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = temp_loc.AsRegister<Register>();
+        __ movl(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ movl(out, Address(out, component_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5335,6 +5867,7 @@
       __ jmp(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
@@ -5343,8 +5876,8 @@
         __ cmpl(out, Address(ESP, cls.GetStackIndex()));
       }
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+                                                                    /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ movl(out, Immediate(1));
@@ -5353,13 +5886,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved & interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+                                                                    /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ jmp(&done);
       }
@@ -5384,75 +5929,73 @@
 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kInterfaceCheck:
-    case TypeCheckKind::kUnresolvedCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86 uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Location cls = locations->InAt(1);
-  Register temp = locations->WillCall()
-      ? kNoRegister
-      : locations->GetTemp(0).AsRegister<Register>();
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+                                                        is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
-  NearLabel done, abstract_entry;
+  NearLabel done;
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
     __ testl(obj, obj);
     __ j(kEqual, &done);
   }
 
-  if (locations->WillCall()) {
-    __ movl(obj, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ movl(temp, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ movl(temp, Address(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
@@ -5463,19 +6006,44 @@
       }
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, success;
+      NearLabel loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ movl(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
       __ testl(temp, temp);
-      // Jump to the slow path to throw the exception.
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -5485,6 +6053,7 @@
       __ j(kNotEqual, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
@@ -5496,16 +6065,39 @@
         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ movl(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ testl(temp, temp);
       __ j(kNotEqual, &loop);
-      // Jump to the slow path to throw the exception.
-      __ jmp(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -5513,29 +6105,67 @@
         __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = temp2_loc.AsRegister<Register>();
+        __ movl(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ movl(temp, Address(temp, component_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
       __ testl(temp, temp);
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kEqual, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved &
+      // interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -5551,6 +6181,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5686,6 +6321,82 @@
   }
 }
 
+void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                Location out,
+                                                Location ref,
+                                                Location obj,
+                                                uint32_t offset,
+                                                Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                  Location out,
+                                                  Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ac3d06c..064051c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -227,14 +227,12 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction);
   void GenerateExplicitNullCheck(HNullCheck* instruction);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
-  void GenerateCompareTestAndBranch(HIf* if_inst,
-                                    HCondition* condition,
+                             Label* false_target);
+  void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
-                                    Label* false_target,
-                                    Label* always_true_target);
+                                    Label* false_target);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
@@ -397,7 +395,64 @@
 
   void Finalize(CodeAllocator* allocator) OVERRIDE;
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+  struct PcRelativeDexCacheAccessInfo {
+    PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
+        : target_dex_file(dex_file), element_offset(element_off), label() { }
+
+    const DexFile& target_dex_file;
+    uint32_t element_offset;
+    // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset.
+    Label label;
+  };
+
   // Labels for each block that will be compiled.
   Label* block_labels_;  // Indexed by block id.
   Label frame_entry_label_;
@@ -410,6 +465,8 @@
   // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> method_patches_;
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+  // PC-relative DexCache access info.
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
 
   // Offset to the start of the constant area in the assembled code.
   // Used for fixups to the constant area.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5218d70..4618be9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -34,6 +34,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace x86_64 {
 
 static constexpr int kCurrentMethodStackOffset = 0;
@@ -52,16 +55,17 @@
   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -78,16 +82,17 @@
   explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
       SaveLiveRegisters(codegen, instruction_->GetLocations());
     }
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -139,18 +144,19 @@
       : instruction_(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
     RestoreLiveRegisters(codegen, instruction_->GetLocations());
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
-      __ jmp(x64_codegen->GetLabelOf(successor_));
+      __ jmp(x86_64_codegen->GetLabelOf(successor_));
     }
   }
 
@@ -180,7 +186,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
@@ -196,8 +202,11 @@
         locations->InAt(1),
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
         Primitive::kPrimInt);
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
-                               instruction_, instruction_->GetDexPc(), this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
   }
 
   bool IsFatal() const OVERRIDE { return true; }
@@ -222,22 +231,30 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = at_->GetLocations();
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
 
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
-    x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
-                                          : QUICK_ENTRY_POINT(pInitializeType),
-                                          at_, dex_pc_, this);
+    x86_64_codegen->InvokeRuntime(do_clinit_ ?
+                                      QUICK_ENTRY_POINT(pInitializeStaticStorage) :
+                                      QUICK_ENTRY_POINT(pInitializeType),
+                                  at_,
+                                  dex_pc_,
+                                  this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
 
     Location out = locations->Out();
     // Move the class to the desired location.
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
-      x64_codegen->Move(out, Location::RegisterLocation(RAX));
+      x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -271,18 +288,19 @@
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
             Immediate(instruction_->GetStringIndex()));
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
-    x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+    x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -308,18 +326,9 @@
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
-      CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ movl(temp, Address(obj, class_offset));
-      __ MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -336,21 +345,24 @@
         Primitive::kPrimNot);
 
     if (instruction_->IsInstanceOf()) {
-      x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                                 instruction_,
-                                 dex_pc,
-                                 this);
+      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+                                    instruction_,
+                                    dex_pc,
+                                    this);
+      CheckEntrypointTypes<
+          kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                                 instruction_,
-                                 dex_pc,
-                                 this);
+      x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
+                                    instruction_,
+                                    dex_pc,
+                                    this);
+      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
     }
 
     if (!is_fatal_) {
       if (instruction_->IsInstanceOf()) {
-        x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+        x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
       }
 
       RestoreLiveRegisters(codegen, locations);
@@ -375,15 +387,16 @@
       : instruction_(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, instruction_->GetLocations());
     DCHECK(instruction_->IsDeoptimize());
     HDeoptimize* deoptimize = instruction_->AsDeoptimize();
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
-                               deoptimize,
-                               deoptimize->GetDexPc(),
-                               this);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+                                  deoptimize,
+                                  deoptimize->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
   }
 
   const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -421,11 +434,12 @@
         nullptr);
     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
 
-    CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
-    x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
-                               instruction_,
-                               instruction_->GetDexPc(),
-                               this);
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
     RestoreLiveRegisters(codegen, locations);
     __ jmp(GetExitLabel());
   }
@@ -438,6 +452,219 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
+                                            Location out,
+                                            Location ref,
+                                            Location obj,
+                                            uint32_t offset,
+                                            Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial
+    // object has been overwritten by (or after) the heap object
+    // reference load to be instrumented, e.g.:
+    //
+    //   __ movl(out, Address(out, offset));
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    CpuRegister reg_out = out_.AsRegister<CpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute real offset and store it in index_.
+        Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::x86_64::X86_64Assembler::shll and
+          // art::x86_64::X86_64Assembler::AddImmediate below), but it
+          // has not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
+          __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the
+        // scale factor (2) cannot overflow in practice, as the
+        // runtime is unable to allocate object arrays with a size
+        // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegister());
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
+    }
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierForHeapReferenceSlowPathX86_64";
+  }
+
+ private:
+  CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
+    size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return static_cast<CpuRegister>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on x86-64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ jmp(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
+};
+
 #undef __
 #define __ down_cast<X86_64Assembler*>(GetAssembler())->
 
@@ -503,7 +730,7 @@
                     Address::Absolute(invoke->GetStringInitOffset(), true));
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
@@ -514,15 +741,15 @@
       __ Bind(&method_patches_.back().label);  // Bind the label at the end of the "movl" insn.
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
-                                             invoke->GetDexCacheArrayOffset());
+      pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                                  invoke->GetDexCacheArrayOffset());
       __ movq(temp.AsRegister<CpuRegister>(),
               Address::Absolute(kDummy32BitOffset, false /* no_rip */));
       // Bind the label at the end of the "movl" insn.
-      __ Bind(&pc_rel_dex_cache_patches_.back().label);
+      __ Bind(&pc_relative_dex_cache_patches_.back().label);
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
       CpuRegister reg = temp.AsRegister<CpuRegister>();
       if (current_method.IsRegister()) {
@@ -533,7 +760,7 @@
         method_reg = reg.AsRegister();
         __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
       }
-      // temp = temp->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ movq(reg,
               Address(CpuRegister(method_reg),
                       ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
@@ -575,13 +802,25 @@
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
+
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
-  // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ movl(temp, Address(CpuRegister(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
@@ -593,28 +832,27 @@
 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      method_patches_.size() + relative_call_patches_.size() + pc_rel_dex_cache_patches_.size();
+      method_patches_.size() +
+      relative_call_patches_.size() +
+      pc_relative_dex_cache_patches_.size();
   linker_patches->reserve(size);
+  // The label points to the end of the "movl" insn but the literal offset for method
+  // patch needs to point to the embedded constant which occupies the last 4 bytes.
+  constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
   for (const MethodPatchInfo<Label>& info : method_patches_) {
-    // The label points to the end of the "movl" instruction but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
                                                        info.target_method.dex_file,
                                                        info.target_method.dex_method_index));
   }
   for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
-    // The label points to the end of the "call" instruction but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
-  for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
-    // The label points to the end of the "mov" instruction but the literal offset for method
-    // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
-    uint32_t literal_offset = info.label.Position() - 4;
+  for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+    uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
                                                               &info.target_dex_file,
                                                               info.label.Position(),
@@ -673,9 +911,9 @@
 // Use a fake return address register to mimic Quick.
 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
-                const X86_64InstructionSetFeatures& isa_features,
-                const CompilerOptions& compiler_options,
-                OptimizingCompilerStats* stats)
+                                         const X86_64InstructionSetFeatures& isa_features,
+                                         const CompilerOptions& compiler_options,
+                                         OptimizingCompilerStats* stats)
       : CodeGenerator(graph,
                       kNumberOfCpuRegisters,
                       kNumberOfFloatRegisters,
@@ -695,7 +933,7 @@
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
@@ -729,7 +967,7 @@
       LOG(FATAL) << "Unreachable type " << type;
   }
 
-  return Location();
+  return Location::NoLocation();
 }
 
 void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
@@ -1083,26 +1321,19 @@
   __ j(X86_64FPCondition(cond->GetCondition()), true_label);
 }
 
-void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr,
-                                                                  HCondition* condition,
-                                                                  Label* true_target,
-                                                                  Label* false_target,
-                                                                  Label* always_true_target) {
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
+                                                                  Label* true_target_in,
+                                                                  Label* false_target_in) {
+  // Generated branching requires both targets to be explicit. If either of the
+  // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+  Label fallthrough_target;
+  Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+  Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
   LocationSummary* locations = condition->GetLocations();
   Location left = locations->InAt(0);
   Location right = locations->InAt(1);
 
-  // We don't want true_target as a nullptr.
-  if (true_target == nullptr) {
-    true_target = always_true_target;
-  }
-  bool falls_through = (false_target == nullptr);
-
-  // FP compares don't like null false_targets.
-  if (false_target == nullptr) {
-    false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  }
-
   Primitive::Type type = condition->InputAt(0)->GetType();
   switch (type) {
     case Primitive::kPrimLong: {
@@ -1161,117 +1392,140 @@
       LOG(FATAL) << "Unexpected condition type " << type;
   }
 
-  if (!falls_through) {
+  if (false_target != &fallthrough_target) {
     __ jmp(false_target);
   }
+
+  if (fallthrough_target.IsLinked()) {
+    __ Bind(&fallthrough_target);
+  }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
-                                                           Label* true_target,
-                                                           Label* false_target,
-                                                           Label* always_true_target) {
-  HInstruction* cond = instruction->InputAt(0);
-  if (cond->IsIntConstant()) {
-    // Constant condition, statically compared against 1.
-    int32_t cond_value = cond->AsIntConstant()->GetValue();
-    if (cond_value == 1) {
-      if (always_true_target != nullptr) {
-        __ jmp(always_true_target);
-      }
-      return;
-    } else {
-      DCHECK_EQ(cond_value, 0);
-    }
-  } else {
-    bool is_materialized =
-        !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
-    // Moves do not affect the eflags register, so if the condition is
-    // evaluated just before the if, we don't need to evaluate it
-    // again.  We can't use the eflags on FP conditions if they are
-    // materialized due to the complex branching.
-    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-    bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
-        && !Primitive::IsFloatingPointType(type);
+static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+  // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
+  // are set only strictly before `branch`. We can't use the eflags on long
+  // conditions if they are materialized due to the complex branching.
+  return cond->IsCondition() &&
+         cond->GetNext() == branch &&
+         !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
+}
 
-    if (is_materialized) {
-      if (!eflags_set) {
-        // Materialized condition, compare against 0.
-        Location lhs = instruction->GetLocations()->InAt(0);
-        if (lhs.IsRegister()) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
-                  Immediate(0));
-        }
-        __ j(kNotEqual, true_target);
+void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
+                                                           size_t condition_input_index,
+                                                           Label* true_target,
+                                                           Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against 1.
+    if (cond->AsIntConstant()->IsOne()) {
+      if (true_target != nullptr) {
+        __ jmp(true_target);
+      }
+    } else {
+      DCHECK(cond->AsIntConstant()->IsZero());
+      if (false_target != nullptr) {
+        __ jmp(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    if (AreEflagsSetFrom(cond, instruction)) {
+      if (true_target == nullptr) {
+        __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
       } else {
         __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
       }
     } else {
-      // Condition has not been materialized, use its inputs as the
-      // comparison and its condition as the branch condition.
-
-      // Is this a long or FP comparison that has been folded into the HCondition?
-      if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-        // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
-                                     true_target, false_target, always_true_target);
-        return;
-      }
-
-      Location lhs = cond->GetLocations()->InAt(0);
-      Location rhs = cond->GetLocations()->InAt(1);
-      if (rhs.IsRegister()) {
-        __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
-      } else if (rhs.IsConstant()) {
-        int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
-        if (constant == 0) {
-          __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
-        } else {
-          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
-        }
+      // Materialized condition, compare against 0.
+      Location lhs = instruction->GetLocations()->InAt(condition_input_index);
+      if (lhs.IsRegister()) {
+        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
       } else {
-        __ cmpl(lhs.AsRegister<CpuRegister>(),
-                Address(CpuRegister(RSP), rhs.GetStackIndex()));
+        __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
       }
-      __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
+      if (true_target == nullptr) {
+        __ j(kEqual, false_target);
+      } else {
+        __ j(kNotEqual, true_target);
+      }
+    }
+  } else {
+    // Condition has not been materialized, use its inputs as the
+    // comparison and its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+
+    // If this is a long or FP comparison that has been folded into
+    // the HCondition, generate the comparison directly.
+    Primitive::Type type = condition->InputAt(0)->GetType();
+    if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      return;
+    }
+
+    Location lhs = condition->GetLocations()->InAt(0);
+    Location rhs = condition->GetLocations()->InAt(1);
+    if (rhs.IsRegister()) {
+      __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+    } else if (rhs.IsConstant()) {
+      int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+      if (constant == 0) {
+        __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+      } else {
+        __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+      }
+    } else {
+      __ cmpl(lhs.AsRegister<CpuRegister>(),
+              Address(CpuRegister(RSP), rhs.GetStackIndex()));
+    }
+      if (true_target == nullptr) {
+      __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
+    } else {
+      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
     }
   }
-  if (false_target != nullptr) {
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
     __ jmp(false_target);
   }
 }
 
 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
-  HInstruction* cond = if_instr->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
-  Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
-  Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
-  Label* always_true_target = true_target;
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfTrueSuccessor())) {
-    always_true_target = nullptr;
-  }
-  if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
-                                if_instr->IfFalseSuccessor())) {
-    false_target = nullptr;
-  }
-  GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+  HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+  HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+  Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
   LocationSummary* locations = new (GetGraph()->GetArena())
       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
-  HInstruction* cond = deoptimize->InputAt(0);
-  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
     locations->SetInAt(0, Location::Any());
   }
 }
@@ -1280,8 +1534,10 @@
   SlowPathCode* slow_path = new (GetGraph()->GetArena())
       DeoptimizationSlowPathX86_64(deoptimize);
   codegen_->AddSlowPath(slow_path);
-  Label* slow_path_entry = slow_path->GetEntryLabel();
-  GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
 }
 
 void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
@@ -1819,7 +2075,7 @@
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
   }
-  return Location();
+  return Location::NoLocation();
 }
 
 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
@@ -1890,7 +2146,6 @@
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -1903,31 +2158,41 @@
 
 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+  LocationSummary* locations = invoke->GetLocations();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
-  // Set the hidden argument.
-  CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+  // Set the hidden argument. This is safe to do this here, as RAX
+  // won't be modified thereafter, before the `call` instruction.
+  DCHECK_EQ(RAX, hidden_reg.AsRegister());
   codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ movl(temp, Address(temp, class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86_64WordSize).SizeValue()));
+  __ call(Address(temp,
+                  ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2562,7 +2827,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2682,7 +2947,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -3517,22 +3782,19 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
   InvokeRuntimeCallingConvention calling_convention;
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
-  InvokeRuntimeCallingConvention calling_convention;
-  codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
-                           instruction->GetTypeIndex());
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
-
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -3551,13 +3813,13 @@
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
                            instruction->GetTypeIndex());
-
   // Note: if heap poisoning is enabled, the entry point takes cares
   // of poisoning the reference.
   codegen_->InvokeRuntime(instruction->GetEntrypoint(),
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
 
   DCHECK(!codegen_->IsLeafMethod());
 }
@@ -3669,13 +3931,23 @@
 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object field get when read barriers
+    // are enabled: we do not want the move to overwrite the object's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
@@ -3684,7 +3956,8 @@
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
 
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+  Location base_loc = locations->InAt(0);
+  CpuRegister base = base_loc.AsRegister<CpuRegister>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
   Primitive::Type field_type = field_info.GetFieldType();
@@ -3744,7 +4017,7 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
   }
 }
 
@@ -3755,14 +4028,25 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   Primitive::Type field_type = field_info.GetFieldType();
+  bool is_volatile = field_info.IsVolatile();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    if (is_volatile) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
   } else {
-    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
+    if (is_volatile) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
+    } else {
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    }
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
@@ -3790,11 +4074,13 @@
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       if (value.IsConstant()) {
-        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
         __ movb(Address(base, offset), Immediate(v));
       } else {
         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3805,7 +4091,7 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       if (value.IsConstant()) {
-        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
         __ movw(Address(base, offset), Immediate(v));
       } else {
         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3838,9 +4124,11 @@
     case Primitive::kPrimLong: {
       if (value.IsConstant()) {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(v));
-        int32_t v_32 = v;
-        __ movq(Address(base, offset), Immediate(v_32));
+        codegen_->MoveInt64ToAddress(Address(base, offset),
+                                     Address(base, offset + sizeof(int32_t)),
+                                     v,
+                                     instruction);
+        maybe_record_implicit_null_check_done = true;
       } else {
         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
       }
@@ -3848,12 +4136,28 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v =
+            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v =
+            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        codegen_->MoveInt64ToAddress(Address(base, offset),
+                                     Address(base, offset + sizeof(int32_t)),
+                                     v,
+                                     instruction);
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -3862,7 +4166,9 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  if (!maybe_record_implicit_null_check_done) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
 
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
@@ -4029,20 +4335,31 @@
 }
 
 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object array get when read barriers
+    // are enabled: we do not want the move to overwrite the array's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Primitive::Type type = instruction->GetType();
 
@@ -4097,8 +4414,9 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-                    "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
@@ -4153,8 +4471,17 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
   if (type == Primitive::kPrimNot) {
-    CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-    __ MaybeUnpoisonHeapReference(out);
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+    }
   }
 }
 
@@ -4164,37 +4491,41 @@
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   bool may_need_runtime_call = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
 
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  locations->SetInAt(2, Location::RequiresRegister());
-  if (value_type == Primitive::kPrimLong) {
-    locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
-  } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
 
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+
+    // This first temporary register is possibly used for heap
+    // reference poisoning and/or read barrier emission too.
+    locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>();
+  Location array_loc = locations->InAt(0);
+  CpuRegister array = array_loc.AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4238,6 +4569,7 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+
       if (!value.IsRegister()) {
         // Just setting null.
         DCHECK(instruction->InputAt(2)->IsNullConstant());
@@ -4266,22 +4598,62 @@
           __ Bind(&not_null);
         }
 
-        __ movl(temp, Address(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ MaybeUnpoisonHeapReference(temp);
-        __ movl(temp, Address(temp, component_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ cmpl(temp, Address(register_value, class_offset));
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          __ j(kEqual, &do_put);
-          __ MaybeUnpoisonHeapReference(temp);
-          __ movl(temp, Address(temp, super_offset));
-          // No need to unpoison the result, we're comparing against null.
-          __ testl(temp, temp);
-          __ j(kNotEqual, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ movl(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ movl(temp, Address(temp, component_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = register_value->klass_
+          //   __ movl(temp2, Address(register_value, class_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+          //
+          //   __ cmpl(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ jmp(slow_path->GetEntryLabel());
         } else {
-          __ j(kNotEqual, slow_path->GetEntryLabel());
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ movl(temp, Address(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ movl(temp, Address(temp, component_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor the object reference in `register_value->klass`, as
+          // we are comparing two poisoned references.
+          __ cmpl(temp, Address(register_value, class_offset));
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            __ j(kEqual, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            __ MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ movl(temp, Address(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ testl(temp, temp);
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ j(kNotEqual, slow_path->GetEntryLabel());
+          }
         }
       }
 
@@ -4307,6 +4679,7 @@
 
       break;
     }
+
     case Primitive::kPrimInt: {
       uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
       Address address = index.IsConstant()
@@ -4330,13 +4703,15 @@
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
       if (value.IsRegister()) {
         __ movq(address, value.AsRegister<CpuRegister>());
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(v));
-        int32_t v_32 = v;
-        __ movq(address, Immediate(v_32));
+        Address address_high = index.IsConstant()
+            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                offset + sizeof(int32_t))
+            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
       }
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4345,8 +4720,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v =
+            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
@@ -4356,9 +4737,18 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else {
+        int64_t v =
+            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        Address address_high = index.IsConstant()
+            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                offset + sizeof(int32_t))
+            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
+      }
       break;
     }
 
@@ -4739,7 +5129,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-      Location::RegisterLocation(RAX));
+      Location::RegisterLocation(RAX),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -4750,31 +5141,58 @@
                             cls,
                             cls->GetDexPc(),
                             nullptr);
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
     return;
   }
 
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location out_loc = locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  } else {
-    DCHECK(cls->CanCallRuntime());
-    __ movq(out, Address(
-        current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
-    __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
-
-    SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
-        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
-    codegen_->AddSlowPath(slow_path);
-    __ testl(out, out);
-    __ j(kEqual, slow_path->GetEntryLabel());
-    if (cls->MustGenerateClinitCheck()) {
-      GenerateClassInitializationCheck(slow_path, out);
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ leaq(out, Address(current_method, declaring_class_offset));
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
     } else {
-      __ Bind(slow_path->GetExitLabel());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ movl(out, Address(current_method, declaring_class_offset));
+    }
+  } else {
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+    __ movq(out, Address(current_method,
+                         ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ leaq(out, Address(out, cache_offset));
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ movl(out, Address(out, cache_offset));
+    }
+
+    if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+      DCHECK(cls->CanCallRuntime());
+      SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+          cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+      codegen_->AddSlowPath(slow_path);
+      if (!cls->IsInDexCache()) {
+        __ testl(out, out);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+      if (cls->MustGenerateClinitCheck()) {
+        GenerateClassInitializationCheck(slow_path, out);
+      } else {
+        __ Bind(slow_path->GetExitLabel());
+      }
     }
   }
 }
@@ -4809,12 +5227,35 @@
   codegen_->AddSlowPath(slow_path);
 
   LocationSummary* locations = load->GetLocations();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location out_loc = locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
-  __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
-  __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ leaq(out, Address(current_method, declaring_class_offset));
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ movl(out, Address(current_method, declaring_class_offset));
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+  __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ leaq(out, Address(out, cache_offset));
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ movl(out, Address(out, cache_offset));
+  }
+
   __ testl(out, out);
   __ j(kEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
@@ -4854,44 +5295,49 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86_64 uses this register too.
-    locations->SetOut(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(Location::RegisterLocation(RAX));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
+  locations->SetOut(Location::RequiresRegister());
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location out_loc =  locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4906,15 +5352,9 @@
     __ j(kEqual, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ movl(target, Address(obj, class_offset));
-  __ MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ movl(out, Address(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -4936,13 +5376,23 @@
       }
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+        __ movl(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -4959,6 +5409,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
@@ -4970,8 +5421,17 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &success);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+        __ movl(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ movl(out, Address(out, super_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -4983,6 +5443,7 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       NearLabel exact_check;
@@ -4993,9 +5454,18 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+        __ movl(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ movl(out, Address(out, component_offset));
-      __ MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5006,6 +5476,7 @@
       __ jmp(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5014,8 +5485,8 @@
         __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+                                                                       /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ movl(out, Immediate(1));
@@ -5024,13 +5495,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved & interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+                                                                       /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ jmp(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ jmp(&done);
       }
@@ -5055,58 +5538,60 @@
 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
-
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::Any());
-    // Note that TypeCheckSlowPathX86_64 uses this register too.
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::Any());
+  // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
     locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
-  CpuRegister temp = locations->WillCall()
-      ? CpuRegister(kNoRegister)
-      : locations->GetTemp(0).AsRegister<CpuRegister>();
-
+  Location temp_loc = locations->GetTemp(0);
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCode* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCode* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+                                                           is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   NearLabel done;
   // Avoid null check if we know obj is not null.
@@ -5115,15 +5600,11 @@
     __ j(kEqual, &done);
   }
 
-  if (locations->WillCall()) {
-    __ movl(obj, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ movl(temp, Address(obj, class_offset));
-    __ MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ movl(temp, Address(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       if (cls.IsRegister()) {
@@ -5134,19 +5615,44 @@
       }
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop;
+      NearLabel loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+        __ movl(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
       __ testl(temp, temp);
-      // Jump to the slow path to throw the exception.
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5156,6 +5662,7 @@
       __ j(kNotEqual, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
@@ -5167,16 +5674,39 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+        __ movl(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ movl(temp, Address(temp, super_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ testl(temp, temp);
       __ j(kNotEqual, &loop);
-      // Jump to the slow path to throw the exception.
-      __ jmp(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5184,29 +5714,67 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kEqual, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+        __ movl(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ movl(temp, Address(temp, component_offset));
-      __ MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
       __ testl(temp, temp);
-      __ j(kEqual, slow_path->GetEntryLabel());
+      __ j(kNotEqual, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ j(kEqual, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ movl(temp, Address(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved &
+      // interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ jmp(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -5222,6 +5790,11 @@
                           instruction,
                           instruction->GetDexPc(),
                           nullptr);
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
 }
 
 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5339,6 +5912,82 @@
   }
 }
 
+void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
+                                              Location out,
+                                              Location ref,
+                                              Location obj,
+                                              uint32_t offset,
+                                              Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCode* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                   Location out,
+                                                   Location ref,
+                                                   Location obj,
+                                                   uint32_t offset,
+                                                   Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
+  }
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                     Location out,
+                                                     Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCode* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ jmp(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
@@ -5564,6 +6213,24 @@
   return Address::RIP(table_fixup);
 }
 
+void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
+                                             const Address& addr_high,
+                                             int64_t v,
+                                             HInstruction* instruction) {
+  if (IsInt<32>(v)) {
+    int32_t v_32 = v;
+    __ movq(addr_low, Immediate(v_32));
+    MaybeRecordImplicitNullCheck(instruction);
+  } else {
+    // Didn't fit in a register.  Do it in pieces.
+    int32_t low_v = Low32Bits(v);
+    int32_t high_v = High32Bits(v);
+    __ movl(addr_low, Immediate(low_v));
+    MaybeRecordImplicitNullCheck(instruction);
+    __ movl(addr_high, Immediate(high_v));
+  }
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index fc485f5..145b1f3 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -217,14 +217,12 @@
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
   void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
                              Label* true_target,
-                             Label* false_target,
-                             Label* always_true_target);
-  void GenerateCompareTestAndBranch(HIf* if_inst,
-                                    HCondition* condition,
+                             Label* false_target);
+  void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
-                                    Label* false_target,
-                                    Label* always_true_target);
+                                    Label* false_target);
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
@@ -352,6 +350,51 @@
     return isa_features_;
   }
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` provided (i.e., when it is different from
+  // Location::NoLocation()), the offset value passed to
+  // artReadBarrierSlow is adjusted to take `index` into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
   int ConstantAreaStart() const {
     return constant_area_start_;
   }
@@ -368,6 +411,12 @@
   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
   void Store64BitValueToStack(Location dest, int64_t value);
 
+  // Assign a 64 bit constant to an address.
+  void MoveInt64ToAddress(const Address& addr_low,
+                          const Address& addr_high,
+                          int64_t v,
+                          HInstruction* instruction);
+
  private:
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
@@ -395,7 +444,7 @@
   ArenaDeque<MethodPatchInfo<Label>> method_patches_;
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
   // PC-relative DexCache access info.
-  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+  ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
 
   // When we don't know the proper offset for the value, we use kDummy32BitOffset.
   // We will fix this up in the linker later to have the right value.
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index e1a8c9c..af8b8b5 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 
+#include "code_generator.h"
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -255,6 +256,67 @@
   return true;
 }
 
+static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
+    case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
+    case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::NO_SHIFT;
+  }
+}
+
+static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
+    case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
+    case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
+    case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
+    case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
+    case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::NO_EXTEND;
+  }
+}
+
+static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+  if (instruction->IsTypeConversion()) {
+    HTypeConversion* conversion = instruction->AsTypeConversion();
+    Primitive::Type result_type = conversion->GetResultType();
+    Primitive::Type input_type = conversion->GetInputType();
+    // We don't expect to see the same type as input and result.
+    return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+        (result_type != input_type);
+  } else {
+    return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+  }
+}
+
+static inline bool HasShifterOperand(HInstruction* instr) {
+  // `neg` instructions are an alias of `sub` using the zero register as the
+  // first register input.
+  bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
+      instr->IsOr() || instr->IsSub() || instr->IsXor();
+  return res;
+}
+
+static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+  DCHECK(HasShifterOperand(instruction));
+  // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
+  // does *not* support extension. This is because the `extended register` form
+  // of the `sub` instruction interprets the left register with code 31 as the
+  // stack pointer and not the zero register. (So does the `immediate` form.) In
+  // the other form `shifted register, the register with code 31 is interpreted
+  // as the zero register.
+  return instruction->IsAdd() || instruction->IsSub();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
new file mode 100644
index 0000000..b459d24
--- /dev/null
+++ b/compiler/optimizing/common_dominator.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Helper class for finding common dominators of two or more blocks in a graph.
+// The domination information of a graph must not be modified while there is
+// a CommonDominator object as it's internal state could become invalid.
+class CommonDominator {
+ public:
+  // Convenience function to find the common dominator of 2 blocks.
+  static HBasicBlock* ForPair(HBasicBlock* block1, HBasicBlock* block2) {
+    CommonDominator finder(block1);
+    finder.Update(block2);
+    return finder.Get();
+  }
+
+  // Create a finder starting with a given block.
+  explicit CommonDominator(HBasicBlock* block)
+      : dominator_(block), chain_length_(ChainLength(block)) {
+    DCHECK(block != nullptr);
+  }
+
+  // Update the common dominator with another block.
+  void Update(HBasicBlock* block) {
+    DCHECK(block != nullptr);
+    HBasicBlock* block2 = dominator_;
+    DCHECK(block2 != nullptr);
+    if (block == block2) {
+      return;
+    }
+    size_t chain_length = ChainLength(block);
+    size_t chain_length2 = chain_length_;
+    // Equalize the chain lengths
+    for ( ; chain_length > chain_length2; --chain_length) {
+      block = block->GetDominator();
+      DCHECK(block != nullptr);
+    }
+    for ( ; chain_length2 > chain_length; --chain_length2) {
+      block2 = block2->GetDominator();
+      DCHECK(block2 != nullptr);
+    }
+    // Now run up the chain until we hit the common dominator.
+    while (block != block2) {
+      --chain_length;
+      block = block->GetDominator();
+      DCHECK(block != nullptr);
+      block2 = block2->GetDominator();
+      DCHECK(block2 != nullptr);
+    }
+    dominator_ = block;
+    chain_length_ = chain_length;
+  }
+
+  HBasicBlock* Get() const {
+    return dominator_;
+  }
+
+ private:
+  static size_t ChainLength(HBasicBlock* block) {
+    size_t result = 0;
+    while (block != nullptr) {
+      ++result;
+      block = block->GetDominator();
+    }
+    return result;
+  }
+
+  HBasicBlock* dominator_;
+  size_t chain_length_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 9754043..02e5dab 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -123,20 +123,21 @@
   }
 
   // If we removed at least one block, we need to recompute the full
-  // dominator tree.
+  // dominator tree and try block membership.
   if (removed_one_or_more_blocks) {
     graph_->ClearDominanceInformation();
     graph_->ComputeDominanceInformation();
+    graph_->ComputeTryBlockInformation();
   }
 
   // Connect successive blocks created by dead branches. Order does not matter.
   for (HReversePostOrderIterator it(*graph_); !it.Done();) {
     HBasicBlock* block  = it.Current();
-    if (block->IsEntryBlock() || block->GetSuccessors().size() != 1u) {
+    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
       it.Advance();
       continue;
     }
-    HBasicBlock* successor = block->GetSuccessors()[0];
+    HBasicBlock* successor = block->GetSingleSuccessor();
     if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
       it.Advance();
       continue;
@@ -176,10 +177,7 @@
 }
 
 void HDeadCodeElimination::Run() {
-  if (!graph_->HasTryCatch()) {
-    // TODO: Update dead block elimination and enable for try/catch.
-    RemoveDeadBlocks();
-  }
+  RemoveDeadBlocks();
   SsaRedundantPhiElimination(graph_).Run();
   RemoveDeadInstructions();
 }
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..6582063
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/constant_area_fixups_x86.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
similarity index 64%
copy from compiler/optimizing/constant_area_fixups_x86.h
copy to compiler/optimizing/dex_cache_array_fixups_arm.h
index 4138039..015f910 100644
--- a/compiler/optimizing/constant_area_fixups_x86.h
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -14,24 +14,24 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
-#define ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
 
 #include "nodes.h"
 #include "optimization.h"
 
 namespace art {
-namespace x86 {
+namespace arm {
 
-class ConstantAreaFixups : public HOptimization {
+class DexCacheArrayFixups : public HOptimization {
  public:
-  ConstantAreaFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "constant_area_fixups_x86", stats) {}
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
 
   void Run() OVERRIDE;
 };
 
-}  // namespace x86
+}  // namespace arm
 }  // namespace art
 
-#endif  // ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3de96b5..c16b872 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -163,12 +163,12 @@
 }
 
 void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
-  // Ensure that all exception handlers are catch blocks and that handlers
-  // are not listed multiple times.
+  ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers();
+
+  // Ensure that all exception handlers are catch blocks.
   // Note that a normal-flow successor may be a catch block before CFG
   // simplification. We only test normal-flow successors in SsaChecker.
-  for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) {
-    HBasicBlock* handler = it.Current();
+  for (HBasicBlock* handler : handlers) {
     if (!handler->IsCatchBlock()) {
       AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which "
                             "is not a catch block.",
@@ -177,9 +177,13 @@
                             try_boundary->GetId(),
                             handler->GetBlockId()));
     }
-    if (current_block_->HasSuccessor(handler, it.CurrentSuccessorIndex() + 1)) {
-      AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
-                            handler->GetBlockId(),
+  }
+
+  // Ensure that handlers are not listed multiple times.
+  for (size_t i = 0, e = handlers.size(); i < e; ++i) {
+    if (ContainsElement(handlers, handlers[i], i + 1)) {
+        AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
+                            handlers[i]->GetBlockId(),
                             try_boundary->DebugName(),
                             try_boundary->GetId()));
     }
@@ -188,6 +192,21 @@
   VisitInstruction(try_boundary);
 }
 
+void GraphChecker::VisitLoadException(HLoadException* load) {
+  // Ensure that LoadException is the first instruction in a catch block.
+  if (!load->GetBlock()->IsCatchBlock()) {
+    AddError(StringPrintf("%s:%d is in a non-catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  } else if (load->GetBlock()->GetFirstInstruction() != load) {
+    AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  }
+}
+
 void GraphChecker::VisitInstruction(HInstruction* instruction) {
   if (seen_ids_.IsBitSet(instruction->GetId())) {
     AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -242,10 +261,11 @@
     }
     size_t use_index = use_it.Current()->GetIndex();
     if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
-      AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
+      AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
                             "UseListNode index.",
                             use->DebugName(),
                             use->GetId(),
+                            instruction->DebugName(),
                             instruction->GetId()));
     }
   }
@@ -355,17 +375,14 @@
 
   // Ensure that catch blocks are not normal successors, and normal blocks are
   // never exceptional successors.
-  const size_t num_normal_successors = block->NumberOfNormalSuccessors();
-  for (size_t j = 0; j < num_normal_successors; ++j) {
-    HBasicBlock* successor = block->GetSuccessors()[j];
+  for (HBasicBlock* successor : block->GetNormalSuccessors()) {
     if (successor->IsCatchBlock()) {
       AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
                             successor->GetBlockId(),
                             block->GetBlockId()));
     }
   }
-  for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) {
-    HBasicBlock* successor = block->GetSuccessors()[j];
+  for (HBasicBlock* successor : block->GetExceptionalSuccessors()) {
     if (!successor->IsCatchBlock()) {
       AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
                             successor->GetBlockId(),
@@ -377,10 +394,14 @@
   // block with multiple successors to a block with multiple
   // predecessors). Exceptional edges are synthesized and hence
   // not accounted for.
-  if (block->NumberOfNormalSuccessors() > 1) {
-    for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
-      HBasicBlock* successor = block->GetSuccessors()[j];
-      if (successor->GetPredecessors().size() > 1) {
+  if (block->GetSuccessors().size() > 1) {
+    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+      if (successor->IsExitBlock() &&
+          block->IsSingleTryBoundary() &&
+          block->GetPredecessors().size() == 1u &&
+          block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) {
+        // Allowed critical edge Throw->TryBoundary->Exit.
+      } else if (successor->GetPredecessors().size() > 1) {
         AddError(StringPrintf("Critical edge between blocks %d and %d.",
                               block->GetBlockId(),
                               successor->GetBlockId()));
@@ -445,12 +466,18 @@
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
-  // Ensure the pre-header block is first in the list of
-  // predecessors of a loop header.
+  // Ensure the pre-header block is first in the list of predecessors of a loop
+  // header and that the header block is its only successor.
   if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
     AddError(StringPrintf(
         "Loop pre-header is not the first predecessor of the loop header %d.",
         id));
+  } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) {
+    AddError(StringPrintf(
+        "Loop pre-header %d of loop defined by header %d has %zu successors.",
+        loop_information->GetPreHeader()->GetBlockId(),
+        id,
+        loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
   // Ensure the loop header has only one incoming branch and the remaining
@@ -493,6 +520,13 @@
             "Loop defined by header %d has an invalid back edge %d.",
             id,
             back_edge_id));
+      } else if (back_edge->GetLoopInformation() != loop_information) {
+        AddError(StringPrintf(
+            "Back edge %d of loop defined by header %d belongs to nested loop "
+            "with header %d.",
+            back_edge_id,
+            id,
+            back_edge->GetLoopInformation()->GetHeader()->GetBlockId()));
       }
     }
   }
@@ -531,10 +565,14 @@
        !use_it.Done(); use_it.Advance()) {
     HInstruction* use = use_it.Current()->GetUser();
     if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
-      AddError(StringPrintf("Instruction %d in block %d does not dominate "
-                            "use %d in block %d.",
-                            instruction->GetId(), current_block_->GetBlockId(),
-                            use->GetId(), use->GetBlock()->GetBlockId()));
+      AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
+                            "use %s:%d in block %d.",
+                            instruction->DebugName(),
+                            instruction->GetId(),
+                            current_block_->GetBlockId(),
+                            use->DebugName(),
+                            use->GetId(),
+                            use->GetBlock()->GetBlockId()));
     }
   }
 
@@ -697,26 +735,31 @@
     }
   }
 
-  // Test phi equivalents. There should not be two of the same type and they
-  // should only be created for constants which were untyped in DEX.
-  for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-    HPhi* other_phi = phi_it.Current()->AsPhi();
-    if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
-      if (phi->GetType() == other_phi->GetType()) {
-        std::stringstream type_str;
-        type_str << phi->GetType();
-        AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
-                              phi->GetId(),
-                              phi->GetRegNumber(),
-                              type_str.str().c_str()));
-      } else {
-        ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
-        if (!IsConstantEquivalent(phi, other_phi, &visited)) {
-          AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
-                                "are not equivalents of constants.",
+  // Test phi equivalents. There should not be two of the same type and they should only be
+  // created for constants which were untyped in DEX. Note that this test can be skipped for
+  // a synthetic phi (indicated by lack of a virtual register).
+  if (phi->GetRegNumber() != kNoRegNumber) {
+    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+         !phi_it.Done();
+         phi_it.Advance()) {
+      HPhi* other_phi = phi_it.Current()->AsPhi();
+      if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
+        if (phi->GetType() == other_phi->GetType()) {
+          std::stringstream type_str;
+          type_str << phi->GetType();
+          AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
                                 phi->GetId(),
-                                other_phi->GetId(),
-                                phi->GetRegNumber()));
+                                phi->GetRegNumber(),
+                                type_str.str().c_str()));
+        } else {
+          ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
+          if (!IsConstantEquivalent(phi, other_phi, &visited)) {
+            AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
+                                  "are not equivalents of constants.",
+                                  phi->GetId(),
+                                  other_phi->GetId(),
+                                  phi->GetRegNumber()));
+          }
         }
       }
     }
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index abf3659..d5ddbab 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -50,6 +50,9 @@
   // Check successors of blocks ending in TryBoundary.
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
 
+  // Check that LoadException is the first instruction in a catch block.
+  void VisitLoadException(HLoadException* load) OVERRIDE;
+
   // Check that HCheckCast and HInstanceOf have HLoadClass as second input.
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4111671..e9fdb84 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -24,6 +24,7 @@
 #include "code_generator.h"
 #include "dead_code_elimination.h"
 #include "disassembler.h"
+#include "inliner.h"
 #include "licm.h"
 #include "nodes.h"
 #include "optimization.h"
@@ -252,8 +253,7 @@
   void PrintSuccessors(HBasicBlock* block) {
     AddIndent();
     output_ << "successors";
-    for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
-      HBasicBlock* successor = block->GetSuccessors()[i];
+    for (HBasicBlock* successor : block->GetNormalSuccessors()) {
       output_ << " \"B" << successor->GetBlockId() << "\" ";
     }
     output_<< std::endl;
@@ -262,8 +262,7 @@
   void PrintExceptionHandlers(HBasicBlock* block) {
     AddIndent();
     output_ << "xhandlers";
-    for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) {
-      HBasicBlock* handler = block->GetSuccessors()[i];
+    for (HBasicBlock* handler : block->GetExceptionalSuccessors()) {
       output_ << " \"B" << handler->GetBlockId() << "\" ";
     }
     if (block->IsExitBlock() &&
@@ -394,9 +393,15 @@
 
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     VisitInvoke(invoke);
-    StartAttributeStream("recursive") << std::boolalpha
-                                      << invoke->IsRecursive()
-                                      << std::noboolalpha;
+    StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+    if (invoke->IsStatic()) {
+      StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
+    }
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    VisitInvoke(invoke);
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
   }
 
@@ -420,13 +425,21 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
-  bool IsPass(const char* name) {
-    return strcmp(pass_name_, name) == 0;
+#ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
+    if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+      StartAttributeStream("shift") << instruction->GetShiftAmount();
+    }
   }
 
-  bool IsReferenceTypePropagationPass() {
-    return strstr(pass_name_, ReferenceTypePropagation::kReferenceTypePropagationPassName)
-        != nullptr;
+  void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+#endif
+
+  bool IsPass(const char* name) {
+    return strcmp(pass_name_, name) == 0;
   }
 
   void PrintInstruction(HInstruction* instruction) {
@@ -492,7 +505,8 @@
       } else {
         StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
       }
-    } else if (IsReferenceTypePropagationPass()
+    } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)
+        || IsPass(HInliner::kInlinerPassName))
         && (instruction->GetType() == Primitive::kPrimNot)) {
       ReferenceTypeInfo info = instruction->IsLoadClass()
         ? instruction->AsLoadClass()->GetLoadedClassRTI()
@@ -505,6 +519,18 @@
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
       } else if (instruction->IsLoadClass()) {
         StartAttributeStream("klass") << "unresolved";
+      } else if (instruction->IsNullConstant()) {
+        // The NullConstant may be added to the graph during other passes that happen between
+        // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
+        // doesn't run or doesn't inline anything, the NullConstant remains untyped.
+        // So we should check NullConstants for validity only after reference type propagation.
+        //
+        // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
+        // for the benefits.
+        StartAttributeStream("klass") << "not_set";
+        DCHECK(!is_after_pass_
+            || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
+            << " Expected a valid rti after reference type propagation";
       } else {
         DCHECK(!is_after_pass_)
             << "Expected a valid rti after reference type propagation";
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c36de84..4af111b 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -377,9 +377,10 @@
 
   HInstruction* current = block->GetFirstInstruction();
   while (current != nullptr) {
-    set->Kill(current->GetSideEffects());
     // Save the next instruction in case `current` is removed from the graph.
     HInstruction* next = current->GetNext();
+    // Do not kill the set with the side effects of the instruction just now: if
+    // the instruction is GVN'ed, we don't need to kill.
     if (current->CanBeMoved()) {
       if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
         // For commutative ops, (x op y) will be treated the same as (y op x)
@@ -395,8 +396,11 @@
         current->ReplaceWith(existing);
         current->GetBlock()->RemoveInstruction(current);
       } else {
+        set->Kill(current->GetSideEffects());
         set->Add(current);
       }
+    } else {
+      set->Kill(current->GetSideEffects());
     }
     current = next;
   }
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index b7262f6..5de94f4 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -69,10 +69,13 @@
     entry_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(entry_);
     BuildForLoop(0, n);
+    return_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(return_);
     exit_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(exit_);
     entry_->AddSuccessor(loop_preheader_[0]);
-    loop_header_[0]->AddSuccessor(exit_);
+    loop_header_[0]->AddSuccessor(return_);
+    return_->AddSuccessor(exit_);
     graph_->SetEntryBlock(entry_);
     graph_->SetExitBlock(exit_);
 
@@ -91,6 +94,7 @@
     entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_));
     dum_ = new (&allocator_) HLocal(n + 2);
     entry_->AddInstruction(dum_);
+    return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
 
     // Provide loop instructions.
@@ -177,6 +181,7 @@
 
   // Fixed basic blocks and instructions.
   HBasicBlock* entry_;
+  HBasicBlock* return_;
   HBasicBlock* exit_;
   HInstruction* parameter_;  // "this"
   HInstruction* constant0_;
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5530d26..2ac1e15 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -75,10 +75,12 @@
   return v;
 }
 
-static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) {
-  DCHECK(preheader != nullptr);
+/** Helper method to insert an instruction. */
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+  DCHECK(block != nullptr);
+  DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
   DCHECK(instruction != nullptr);
-  preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction());
+  block->InsertInstructionBefore(instruction, block->GetLastInstruction());
   return instruction;
 }
 
@@ -91,48 +93,98 @@
   DCHECK(induction_analysis != nullptr);
 }
 
-InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context,
-                                                            HInstruction* instruction) {
-  return GetInduction(context, instruction, /* is_min */ true);
-}
-
-InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context,
-                                                            HInstruction* instruction) {
-  return SimplifyMax(GetInduction(context, instruction, /* is_min */ false));
+void InductionVarRange::GetInductionRange(HInstruction* context,
+                                          HInstruction* instruction,
+                                          /*out*/Value* min_val,
+                                          /*out*/Value* max_val,
+                                          /*out*/bool* needs_finite_test) {
+  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (loop != nullptr) {
+    // Set up loop information.
+    HBasicBlock* header = loop->GetHeader();
+    bool in_body = context->GetBlock() != header;
+    HInductionVarAnalysis::InductionInfo* info =
+        induction_analysis_->LookupInfo(loop, instruction);
+    HInductionVarAnalysis::InductionInfo* trip =
+        induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+    // Find range.
+    *min_val = GetVal(info, trip, in_body, /* is_min */ true);
+    *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+    *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+  } else {
+    // No loop to analyze.
+    *min_val = Value();
+    *max_val = Value();
+    *needs_finite_test = false;
+  }
 }
 
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
-                                        /*out*/bool* top_test) {
-  return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test);
+                                        /*out*/bool* needs_finite_test,
+                                        /*out*/bool* needs_taken_test) {
+  return GenerateCode(context,
+                      instruction,
+                      nullptr, nullptr, nullptr, nullptr, nullptr,  // nothing generated yet
+                      needs_finite_test,
+                      needs_taken_test);
 }
 
-bool InductionVarRange::GenerateCode(HInstruction* context,
-                                     HInstruction* instruction,
-                                     HGraph* graph,
-                                     HBasicBlock* block,
-                                     /*out*/HInstruction** lower,
-                                     /*out*/HInstruction** upper) {
-  return GenerateCode(context, instruction, graph, block, lower, upper, nullptr);
+void InductionVarRange::GenerateRangeCode(HInstruction* context,
+                                          HInstruction* instruction,
+                                          HGraph* graph,
+                                          HBasicBlock* block,
+                                          /*out*/HInstruction** lower,
+                                          /*out*/HInstruction** upper) {
+  bool b1, b2;  // unused
+  if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) {
+    LOG(FATAL) << "Failed precondition: GenerateCode()";
+  }
+}
+
+void InductionVarRange::GenerateTakenTest(HInstruction* context,
+                                          HGraph* graph,
+                                          HBasicBlock* block,
+                                          /*out*/HInstruction** taken_test) {
+  bool b1, b2;  // unused
+  if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) {
+    LOG(FATAL) << "Failed precondition: GenerateCode()";
+  }
 }
 
 //
 // Private class methods.
 //
 
-InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context,
-                                                         HInstruction* instruction,
-                                                         bool is_min) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop != nullptr) {
-    HBasicBlock* header = loop->GetHeader();
-    bool in_body = context->GetBlock() != header;
-    return GetVal(induction_analysis_->LookupInfo(loop, instruction),
-                  induction_analysis_->LookupInfo(loop, header->GetLastInstruction()),
-                  in_body,
-                  is_min);
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+  if (info != nullptr) {
+    if (info->induction_class == HInductionVarAnalysis::kLinear) {
+      return true;
+    } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) {
+      return NeedsTripCount(info->op_b);
+    }
   }
-  return Value();
+  return false;
+}
+
+bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+  if (trip != nullptr) {
+    if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+      return trip->operation == HInductionVarAnalysis::kTripCountInBody ||
+             trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe;
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+  if (trip != nullptr) {
+    if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+      return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe ||
+             trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe;
+    }
+  }
+  return false;
 }
 
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
@@ -184,11 +236,13 @@
           case HInductionVarAnalysis::kFetch:
             return GetFetch(info->fetch, trip, in_body, is_min);
           case HInductionVarAnalysis::kTripCountInLoop:
+          case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
               return GetVal(info->op_a, trip, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
+          case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               return Value(0);
             } else if (in_body) {
@@ -356,25 +410,46 @@
                                      HBasicBlock* block,
                                      /*out*/HInstruction** lower,
                                      /*out*/HInstruction** upper,
-                                     /*out*/bool* top_test) {
+                                     /*out*/HInstruction** taken_test,
+                                     /*out*/bool* needs_finite_test,
+                                     /*out*/bool* needs_taken_test) {
   HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
   if (loop != nullptr) {
+    // Set up loop information.
     HBasicBlock* header = loop->GetHeader();
     bool in_body = context->GetBlock() != header;
-    HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
+    HInductionVarAnalysis::InductionInfo* info =
+        induction_analysis_->LookupInfo(loop, instruction);
+    if (info == nullptr) {
+      return false;  // nothing to analyze
+    }
     HInductionVarAnalysis::InductionInfo* trip =
         induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    if (info != nullptr && trip != nullptr) {
-      if (top_test != nullptr) {
-        *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop;
+    // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+    // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+    // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+    // code does not use the trip-count explicitly (since there could be an implicit relation
+    // between e.g. an invariant subscript and a not-taken condition).
+    *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+    *needs_taken_test = IsBodyTripCount(trip);
+    // Code generation for taken test: generate the code when requested or otherwise analyze
+    // if code generation is feasible when taken test is needed.
+    if (taken_test != nullptr) {
+      return GenerateCode(
+          trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
+    } else if (*needs_taken_test) {
+      if (!GenerateCode(
+          trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
+        return false;
       }
-      return
+    }
+    // Code generation for lower and upper.
+    return
         // Success on lower if invariant (not set), or code can be generated.
         ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
             GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
         // And success on upper.
         GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
-    }
   }
   return false;
 }
@@ -387,19 +462,38 @@
                                      bool in_body,
                                      bool is_min) {
   if (info != nullptr) {
+    // Handle current operation.
     Primitive::Type type = Primitive::kPrimInt;
     HInstruction* opa = nullptr;
     HInstruction* opb = nullptr;
-    int32_t value = 0;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
         // Invariants.
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
+          case HInductionVarAnalysis::kLT:
+          case HInductionVarAnalysis::kLE:
+          case HInductionVarAnalysis::kGT:
+          case HInductionVarAnalysis::kGE:
             if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
                 GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
               if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+                HInstruction* operation = nullptr;
+                switch (info->operation) {
+                  case HInductionVarAnalysis::kAdd:
+                    operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+                  case HInductionVarAnalysis::kLT:
+                    operation = new (graph->GetArena()) HLessThan(opa, opb); break;
+                  case HInductionVarAnalysis::kLE:
+                    operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break;
+                  case HInductionVarAnalysis::kGT:
+                    operation = new (graph->GetArena()) HGreaterThan(opa, opb); break;
+                  case HInductionVarAnalysis::kGE:
+                    operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break;
+                  default:
+                    LOG(FATAL) << "unknown operation";
+                }
+                *result = Insert(block, operation);
               }
               return true;
             }
@@ -422,16 +516,21 @@
             }
             break;
           case HInductionVarAnalysis::kFetch:
-            if (graph != nullptr) {
-              *result = info->fetch;  // already in HIR
+            if (info->fetch->GetType() == type) {
+              if (graph != nullptr) {
+                *result = info->fetch;  // already in HIR
+              }
+              return true;
             }
-            return true;
+            break;
           case HInductionVarAnalysis::kTripCountInLoop:
+          case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
               return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
+          case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               if (graph != nullptr) {
                 *result = graph->GetIntConstant(0);
@@ -452,23 +551,44 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear:
+      case HInductionVarAnalysis::kLinear: {
         // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
         // to avoid arithmetic wrap-around situations that are hard to guard against.
-        if (GetConstant(info->op_a, &value)) {
-          if (value == 1 || value == -1) {
-            const bool is_min_a = value == 1 ? is_min : !is_min;
+        int32_t stride_value = 0;
+        if (GetConstant(info->op_a, &stride_value)) {
+          if (stride_value == 1 || stride_value == -1) {
+            const bool is_min_a = stride_value == 1 ? is_min : !is_min;
             if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
                 GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
               if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
+                }
+                *result = Insert(block, oper);
               }
               return true;
             }
           }
         }
         break;
-      default:  // TODO(ajcbik): add more cases
+      }
+      case HInductionVarAnalysis::kWrapAround:
+      case HInductionVarAnalysis::kPeriodic: {
+        // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+        // values are easy to test at runtime without complications of arithmetic wrap-around.
+        Value extreme = GetVal(info, trip, in_body, is_min);
+        if (extreme.is_known && extreme.a_constant == 0) {
+          if (graph != nullptr) {
+            *result = graph->GetIntConstant(extreme.b_constant);
+          }
+          return true;
+        }
+        break;
+      }
+      default:
         break;
     }
   }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7fa5a26..7984871 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -57,29 +57,33 @@
   explicit InductionVarRange(HInductionVarAnalysis* induction);
 
   /**
-   * Given a context denoted by the first instruction, returns a,
-   * possibly conservative, lower bound on the instruction's value.
+   * Given a context denoted by the first instruction, returns a possibly conservative
+   * lower and upper bound on the instruction's value in the output parameters min_val
+   * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
+   * is needed to protect the range evaluation inside its loop.
    */
-  Value GetMinInduction(HInstruction* context, HInstruction* instruction);
+  void GetInductionRange(HInstruction* context,
+                         HInstruction* instruction,
+                         /*out*/Value* min_val,
+                         /*out*/Value* max_val,
+                         /*out*/bool* needs_finite_test);
 
   /**
-   * Given a context denoted by the first instruction, returns a,
-   * possibly conservative, upper bound on the instruction's value.
+   * Returns true if range analysis is able to generate code for the lower and upper
+   * bound expressions on the instruction in the given context. The need_finite_test
+   * and need_taken test flags denote if an additional finite-test and/or taken-test
+   * are needed to protect the range evaluation inside its loop.
    */
-  Value GetMaxInduction(HInstruction* context, HInstruction* instruction);
-
-  /**
-   * Returns true if range analysis is able to generate code for the lower and upper bound
-   * expressions on the instruction in the given context. Output parameter top_test denotes
-   * whether a top test is needed to protect the trip-count expression evaluation.
-   */
-  bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test);
+  bool CanGenerateCode(HInstruction* context,
+                       HInstruction* instruction,
+                       /*out*/bool* needs_finite_test,
+                       /*out*/bool* needs_taken_test);
 
   /**
    * Generates the actual code in the HIR for the lower and upper bound expressions on the
    * instruction in the given context. Code for the lower and upper bound expression are
-   * generated in given block and graph and are returned in lower and upper, respectively.
-   * For a loop invariant, lower is not set.
+   * generated in given block and graph and are returned in the output parameters lower and
+   * upper, respectively. For a loop invariant, lower is not set.
    *
    * For example, given expression x+i with range [0, 5] for i, calling this method
    * will generate the following sequence:
@@ -87,20 +91,35 @@
    * block:
    *   lower: add x, 0
    *   upper: add x, 5
+   *
+   * Precondition: CanGenerateCode() returns true.
    */
-  bool GenerateCode(HInstruction* context,
-                    HInstruction* instruction,
-                    HGraph* graph,
-                    HBasicBlock* block,
-                    /*out*/HInstruction** lower,
-                    /*out*/HInstruction** upper);
+  void GenerateRangeCode(HInstruction* context,
+                         HInstruction* instruction,
+                         HGraph* graph,
+                         HBasicBlock* block,
+                         /*out*/HInstruction** lower,
+                         /*out*/HInstruction** upper);
+
+  /**
+   * Generates explicit taken-test for the loop in the given context. Code is generated in
+   * given block and graph. The taken-test is returned in parameter test.
+   *
+   * Precondition: CanGenerateCode() returns true and needs_taken_test is set.
+   */
+  void GenerateTakenTest(HInstruction* context,
+                         HGraph* graph,
+                         HBasicBlock* block,
+                         /*out*/HInstruction** taken_test);
 
  private:
   //
   // Private helper methods.
   //
 
-  Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min);
+  static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info);
+  static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip);
+  static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip);
 
   static Value GetFetch(HInstruction* instruction,
                         HInductionVarAnalysis::InductionInfo* trip,
@@ -130,8 +149,8 @@
   static Value MergeVal(Value v1, Value v2, bool is_min);
 
   /**
-   * Generates code for lower/upper expression in the HIR. Returns true on success.
-   * With graph == nullptr, the method can be used to determine if code generation
+   * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
+   * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
    */
   bool GenerateCode(HInstruction* context,
@@ -140,7 +159,9 @@
                     HBasicBlock* block,
                     /*out*/HInstruction** lower,
                     /*out*/HInstruction** upper,
-                    bool* top_test);
+                    /*out*/HInstruction** taken_test,
+                    /*out*/bool* needs_finite_test,
+                    /*out*/bool* needs_taken_test);
 
   static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
                            HInductionVarAnalysis::InductionInfo* trip,
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index ce8926a..c2ba157 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -46,6 +46,10 @@
     EXPECT_EQ(v1.is_known, v2.is_known);
   }
 
+  //
+  // Construction methods.
+  //
+
   /** Constructs bare minimum graph. */
   void BuildGraph() {
     graph_->SetNumberOfVRegs(1);
@@ -58,7 +62,7 @@
   }
 
   /** Constructs loop with given upper bound. */
-  void BuildLoop(HInstruction* upper) {
+  void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) {
     // Control flow.
     loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(loop_preheader_);
@@ -66,29 +70,37 @@
     graph_->AddBlock(loop_header);
     HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(loop_body);
+    HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(return_block);
     entry_block_->AddSuccessor(loop_preheader_);
     loop_preheader_->AddSuccessor(loop_header);
     loop_header->AddSuccessor(loop_body);
-    loop_header->AddSuccessor(exit_block_);
+    loop_header->AddSuccessor(return_block);
     loop_body->AddSuccessor(loop_header);
+    return_block->AddSuccessor(exit_block_);
     // Instructions.
     HLocal* induc = new (&allocator_) HLocal(0);
     entry_block_->AddInstruction(induc);
     loop_preheader_->AddInstruction(
-        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0)));  // i = 0
+        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower)));  // i = l
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
     HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
           loop_header->AddInstruction(load);
-    condition_ = new (&allocator_) HLessThan(load, upper);
+    if (stride > 0) {
+      condition_ = new (&allocator_) HLessThan(load, upper);  // i < u
+    } else {
+      condition_ = new (&allocator_) HGreaterThan(load, upper);  // i > u
+    }
     loop_header->AddInstruction(condition_);
-    loop_header->AddInstruction(new (&allocator_) HIf(condition_));  // i < u
+    loop_header->AddInstruction(new (&allocator_) HIf(condition_));
     load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
     loop_body->AddInstruction(load);
-    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1));
+    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride));
     loop_body->AddInstruction(increment_);
-    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i++
+    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i += s
     loop_body->AddInstruction(new (&allocator_) HGoto());
-    exit_block_->AddInstruction(new (&allocator_) HReturnVoid());
+    return_block->AddInstruction(new (&allocator_) HReturnVoid());
+    exit_block_->AddInstruction(new (&allocator_) HExit());
   }
 
   /** Performs induction variable analysis. */
@@ -124,8 +136,20 @@
   }
 
   /** Constructs a trip-count. */
-  HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) {
-    return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
+  HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) {
+    if (in_loop && safe) {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
+    } else if (in_loop) {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr);
+    } else if (safe) {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr);
+    } else {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr);
+    }
   }
 
   /** Constructs a linear a * i + b induction. */
@@ -139,16 +163,34 @@
         HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi));
   }
 
+  /** Constructs a wrap-around induction consisting of a constant, followed info */
+  HInductionVarAnalysis::InductionInfo* CreateWrapAround(
+      int32_t initial,
+      HInductionVarAnalysis::InductionInfo* info) {
+    return iva_->CreateInduction(HInductionVarAnalysis::kWrapAround, CreateConst(initial), info);
+  }
+
   /** Constructs a wrap-around induction consisting of a constant, followed by a range. */
   HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) {
-    return iva_->CreateInduction(
-        HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi));
+    return CreateWrapAround(initial, CreateRange(lo, hi));
   }
 
   //
   // Relay methods.
   //
 
+  bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+    return InductionVarRange::NeedsTripCount(info);
+  }
+
+  bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+    return InductionVarRange::IsBodyTripCount(trip);
+  }
+
+  bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+    return InductionVarRange::IsUnsafeTripCount(trip);
+  }
+
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
     return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
@@ -202,6 +244,26 @@
 // Tests on static methods.
 //
 
+TEST_F(InductionVarRangeTest, TripCountProperties) {
+  EXPECT_FALSE(NeedsTripCount(nullptr));
+  EXPECT_FALSE(NeedsTripCount(CreateConst(1)));
+  EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1)));
+  EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3)));
+  EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1))));
+
+  EXPECT_FALSE(IsBodyTripCount(nullptr));
+  EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true)));
+  EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false)));
+  EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true)));
+  EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false)));
+
+  EXPECT_FALSE(IsUnsafeTripCount(nullptr));
+  EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true)));
+  EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false)));
+  EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true)));
+  EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false)));
+}
+
 TEST_F(InductionVarRangeTest, GetMinMaxNull) {
   ExpectEqual(Value(), GetMin(nullptr, nullptr));
   ExpectEqual(Value(), GetMax(nullptr, nullptr));
@@ -279,10 +341,10 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxLinear) {
-  ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100)));
+  ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) {
@@ -398,61 +460,98 @@
 // Tests on instance methods.
 //
 
-TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) {
-  BuildLoop(graph_->GetIntConstant(1000));
+TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
+  BuildLoop(0, graph_->GetIntConstant(1000), 1);
   PerformInductionVarAnalysis();
   InductionVarRange range(iva_);
 
+  Value v1, v2;
+  bool needs_finite_test = true;
+
   // In context of header: known.
-  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
-  ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(1000), v2);
 
   // In context of loop-body: known.
-  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
-  ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_));
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(999), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(1000), v2);
 }
 
-TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(parameter);
+TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
+  BuildLoop(1000, graph_->GetIntConstant(0), -1);
   PerformInductionVarAnalysis();
   InductionVarRange range(iva_);
 
-  // In context of header: full range unknown.
-  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
-  ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+  Value v1, v2;
+  bool needs_finite_test = true;
+
+  // In context of header: known.
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(1000), v2);
 
   // In context of loop-body: known.
-  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
-  ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_));
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(1000), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(999), v2);
 }
 
-TEST_F(InductionVarRangeTest, CodeGeneration) {
+TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
   HInstruction* parameter = new (&allocator_) HParameterValue(
       graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry_block_->AddInstruction(parameter);
-  BuildLoop(parameter);
+  BuildLoop(0, parameter, 1);
   PerformInductionVarAnalysis();
   InductionVarRange range(iva_);
 
+  Value v1, v2;
+  bool needs_finite_test = true;
+  bool needs_taken_test = true;
+
+  // In context of header: upper unknown.
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(), v2);
+
+  // In context of loop-body: known.
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(parameter, 1, -1), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(parameter, 1, 0), v2);
+
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
-  bool top_test = false;
+  HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test));
-  ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test));
-  EXPECT_TRUE(top_test);
+  EXPECT_FALSE(range.CanGenerateCode(
+      condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  ASSERT_TRUE(range.CanGenerateCode(
+      increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  EXPECT_FALSE(needs_finite_test);
+  EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  EXPECT_TRUE(range.GenerateCode(
-      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper));
+  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
@@ -462,7 +561,7 @@
   ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
   EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
 
-  // Verify upper is (V-1)+0
+  // Verify upper is (V-1)+0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsAdd());
   ASSERT_TRUE(upper->InputAt(0)->IsSub());
@@ -471,6 +570,91 @@
   EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
   ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify taken-test is 0<V.
+  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  ASSERT_TRUE(taken != nullptr);
+  ASSERT_TRUE(taken->IsLessThan());
+  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
+}
+
+TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  entry_block_->AddInstruction(parameter);
+  BuildLoop(1000, parameter, -1);
+  PerformInductionVarAnalysis();
+  InductionVarRange range(iva_);
+
+  Value v1, v2;
+  bool needs_finite_test = true;
+  bool needs_taken_test = true;
+
+  // In context of header: lower unknown.
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(), v1);
+  ExpectEqual(Value(1000), v2);
+
+  // In context of loop-body: known.
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(parameter, 1, 1), v1);
+  ExpectEqual(Value(1000), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(parameter, 1, 0), v1);
+  ExpectEqual(Value(999), v2);
+
+  HInstruction* lower = nullptr;
+  HInstruction* upper = nullptr;
+  HInstruction* taken = nullptr;
+
+  // Can generate code in context of loop-body only.
+  EXPECT_FALSE(range.CanGenerateCode(
+      condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  ASSERT_TRUE(range.CanGenerateCode(
+      increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  EXPECT_FALSE(needs_finite_test);
+  EXPECT_TRUE(needs_taken_test);
+
+  // Generates code.
+  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+
+  // Verify lower is 1000-(-(V-1000)-1).
+  ASSERT_TRUE(lower != nullptr);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  lower = lower->InputAt(1);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+  lower = lower->InputAt(0);
+  ASSERT_TRUE(lower->IsNeg());
+  lower = lower->InputAt(0);
+  ASSERT_TRUE(lower->IsSub());
+  EXPECT_TRUE(lower->InputAt(0)->IsParameterValue());
+  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify upper is 1000-0.
+  ASSERT_TRUE(upper != nullptr);
+  ASSERT_TRUE(upper->IsSub());
+  ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify taken-test is 1000>V.
+  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  ASSERT_TRUE(taken != nullptr);
+  ASSERT_TRUE(taken->IsGreaterThan());
+  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 353881e..6d93be3 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -148,7 +148,7 @@
     // the target method. Since we check above the exact type of the receiver,
     // the only reason this can happen is an IncompatibleClassChangeError.
     return nullptr;
-  } else if (resolved_method->IsAbstract()) {
+  } else if (!resolved_method->IsInvokable()) {
     // The information we had on the receiver was not enough to find
     // the target method. Since we check above the exact type of the receiver,
     // the only reason this can happen is an IncompatibleClassChangeError.
@@ -192,6 +192,10 @@
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+      VLOG(compiler) << "Not inlining a String.<init> method";
+      return false;
+    }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
     mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
@@ -406,8 +410,8 @@
     &type_propagation,
     &sharpening,
     &simplify,
-    &dce,
     &fold,
+    &dce,
   };
 
   for (size_t i = 0; i < arraysize(optimizations); ++i) {
@@ -534,6 +538,7 @@
             ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
   }
 
+  // Check the integrity of reference types and run another type propagation if needed.
   if ((return_replacement != nullptr)
       && (return_replacement->GetType() == Primitive::kPrimNot)) {
     if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
@@ -544,10 +549,20 @@
       DCHECK(return_replacement->IsPhi());
       size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
       ReferenceTypeInfo::TypeHandle return_handle =
-        handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
+          handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
       return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
          return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
+
+    // If the return type is a refinement of the declared type run the type propagation again.
+    ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+    ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+    if (invoke_rti.IsStrictSupertypeOf(return_rti)
+        || (return_rti.IsExact() && !invoke_rti.IsExact())
+        || !return_replacement->CanBeNull()) {
+      ReferenceTypePropagation rtp_fixup(graph_, handles_);
+      rtp_fixup.Run();
+    }
   }
 
   return true;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b97dc1a..2f3df7f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -169,16 +169,6 @@
       //    src
       instruction->ReplaceWith(input_other);
       instruction->GetBlock()->RemoveInstruction(instruction);
-    } else if (instruction->IsShl() && input_cst->IsOne()) {
-      // Replace Shl looking like
-      //    SHL dst, src, 1
-      // with
-      //    ADD dst, src, src
-      HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
-                                                   input_other,
-                                                   input_other);
-      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
-      RecordSimplification();
     }
   }
 }
@@ -372,9 +362,8 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsZero()) {
-        // Replace (bool_value == false) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+        equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
+        block->RemoveInstruction(equal);
         RecordSimplification();
       } else {
         // Replace (bool_value == integer_not_zero_nor_one_constant) with false
@@ -399,9 +388,8 @@
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsOne()) {
-        // Replace (bool_value != true) with !bool_value
-        block->ReplaceAndRemoveInstructionWith(
-            not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+        not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
+        block->RemoveInstruction(not_equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsZero()) {
         // Replace (bool_value != false) with bool_value
@@ -796,6 +784,34 @@
       HShl* shl = new(allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
       RecordSimplification();
+    } else if (IsPowerOfTwo(factor - 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n + 1)
+      // into
+      //    SHL tmp, src, n
+      //    ADD dst, src, tmp
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1)));
+      HAdd* add = new (allocator) HAdd(type, input_other, shl);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, add);
+      RecordSimplification();
+    } else if (IsPowerOfTwo(factor + 1)) {
+      // Transform code looking like
+      //    MUL dst, src, (2^n - 1)
+      // into
+      //    SHL tmp, src, n
+      //    SUB dst, tmp, src
+      HShl* shl = new (allocator) HShl(type,
+                                       input_other,
+                                       GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1)));
+      HSub* sub = new (allocator) HSub(type, shl, input_other);
+
+      block->InsertInstructionBefore(shl, instruction);
+      block->ReplaceAndRemoveInstructionWith(instruction, sub);
+      RecordSimplification();
     }
   }
 }
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index eb79f46..6a34b13 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -16,11 +16,16 @@
 
 #include "instruction_simplifier_arm64.h"
 
+#include "common_arm64.h"
 #include "mirror/array-inl.h"
 
 namespace art {
 namespace arm64 {
 
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+using helpers::ShifterOperandSupportsExtension;
+
 void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
                                                                      HInstruction* array,
                                                                      HInstruction* index,
@@ -62,6 +67,169 @@
   RecordSimplification();
 }
 
+bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
+                                                                   HInstruction* bitfield_op,
+                                                                   bool do_merge) {
+  DCHECK(HasShifterOperand(use));
+  DCHECK(use->IsBinaryOperation() || use->IsNeg());
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+  DCHECK(!bitfield_op->HasEnvironmentUses());
+
+  Primitive::Type type = use->GetType();
+  if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+    return false;
+  }
+
+  HInstruction* left;
+  HInstruction* right;
+  if (use->IsBinaryOperation()) {
+    left = use->InputAt(0);
+    right = use->InputAt(1);
+  } else {
+    DCHECK(use->IsNeg());
+    right = use->AsNeg()->InputAt(0);
+    left = GetGraph()->GetConstant(right->GetType(), 0);
+  }
+  DCHECK(left == bitfield_op || right == bitfield_op);
+
+  if (left == right) {
+    // TODO: Handle special transformations in this situation?
+    // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+    // Or should this be part of a separate transformation logic?
+    return false;
+  }
+
+  bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative();
+  HInstruction* other_input;
+  if (bitfield_op == right) {
+    other_input = left;
+  } else {
+    if (is_commutative) {
+      other_input = right;
+    } else {
+      return false;
+    }
+  }
+
+  HArm64DataProcWithShifterOp::OpKind op_kind;
+  int shift_amount = 0;
+  HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
+      !ShifterOperandSupportsExtension(use)) {
+    return false;
+  }
+
+  if (do_merge) {
+    HArm64DataProcWithShifterOp* alu_with_op =
+        new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
+                                                                 other_input,
+                                                                 bitfield_op->InputAt(0),
+                                                                 op_kind,
+                                                                 shift_amount,
+                                                                 use->GetDexPc());
+    use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+    if (bitfield_op->GetUses().IsEmpty()) {
+      bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+    }
+    RecordSimplification();
+  }
+
+  return true;
+}
+
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+
+  if (bitfield_op->HasEnvironmentUses()) {
+    return false;
+  }
+
+  const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+  // Check whether we can merge the instruction in all its users' shifter operand.
+  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+    HInstruction* use = it_use.Current()->GetUser();
+    if (!HasShifterOperand(use)) {
+      return false;
+    }
+    if (!CanMergeIntoShifterOperand(use, bitfield_op)) {
+      return false;
+    }
+  }
+
+  // Merge the instruction into its uses.
+  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+    HInstruction* use = it_use.Current()->GetUser();
+    bool merged = MergeIntoShifterOperand(use, bitfield_op);
+    DCHECK(merged);
+  }
+
+  return true;
+}
+
+bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
+    HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
+  DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+  DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+  DCHECK_NE(input_binop, input_other);
+  if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+
+  // Try to interpret patterns like
+  //    a * (b <+/-> 1)
+  // as
+  //    (a * b) <+/-> a
+  HInstruction* input_a = input_other;
+  HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
+  HInstruction::InstructionKind op_kind;
+
+  if (input_binop->IsAdd()) {
+    if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+      // Interpret
+      //    a * (b + 1)
+      // as
+      //    (a * b) + a
+      input_b = input_binop->GetLeastConstantLeft();
+      op_kind = HInstruction::kAdd;
+    }
+  } else {
+    DCHECK(input_binop->IsSub());
+    if (input_binop->GetRight()->IsConstant() &&
+        input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+      // Interpret
+      //    a * (b - (-1))
+      // as
+      //    a + (a * b)
+      input_b = input_binop->GetLeft();
+      op_kind = HInstruction::kAdd;
+    } else if (input_binop->GetLeft()->IsConstant() &&
+               input_binop->GetLeft()->AsConstant()->IsOne()) {
+      // Interpret
+      //    a * (1 - b)
+      // as
+      //    a - (a * b)
+      input_b = input_binop->GetRight();
+      op_kind = HInstruction::kSub;
+    }
+  }
+
+  if (input_b == nullptr) {
+    // We did not find a pattern we can optimize.
+    return false;
+  }
+
+  HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
+      mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+  mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+  input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+  return false;
+}
+
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -76,5 +244,110 @@
                                Primitive::ComponentSize(instruction->GetComponentType()));
 }
 
+void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
+  Primitive::Type type = instruction->GetType();
+  if (!Primitive::IsIntOrLongType(type)) {
+    return;
+  }
+
+  HInstruction* use = instruction->HasNonEnvironmentUses()
+      ? instruction->GetUses().GetFirst()->GetUser()
+      : nullptr;
+
+  if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
+    // Replace code looking like
+    //    MUL tmp, x, y
+    //    SUB dst, acc, tmp
+    // with
+    //    MULSUB dst, acc, x, y
+    // Note that we do not want to (unconditionally) perform the merge when the
+    // multiplication has multiple uses and it can be merged in all of them.
+    // Multiple uses could happen on the same control-flow path, and we would
+    // then increase the amount of work. In the future we could try to evaluate
+    // whether all uses are on different control-flow paths (using dominance and
+    // reverse-dominance information) and only perform the merge when they are.
+    HInstruction* accumulator = nullptr;
+    HBinaryOperation* binop = use->AsBinaryOperation();
+    HInstruction* binop_left = binop->GetLeft();
+    HInstruction* binop_right = binop->GetRight();
+    // Be careful after GVN. This should not happen since the `HMul` has only
+    // one use.
+    DCHECK_NE(binop_left, binop_right);
+    if (binop_right == instruction) {
+      accumulator = binop_left;
+    } else if (use->IsAdd()) {
+      DCHECK_EQ(binop_left, instruction);
+      accumulator = binop_right;
+    }
+
+    if (accumulator != nullptr) {
+      HArm64MultiplyAccumulate* mulacc =
+          new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
+                                                                binop->GetKind(),
+                                                                accumulator,
+                                                                instruction->GetLeft(),
+                                                                instruction->GetRight());
+
+      binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+      DCHECK(!instruction->HasUses());
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      RecordSimplification();
+      return;
+    }
+  }
+
+  // Use multiply accumulate instruction for a few simple patterns.
+  // We prefer not applying the following transformations if the left and
+  // right inputs perform the same operation.
+  // We rely on GVN having squashed the inputs if appropriate. However the
+  // results are still correct even if that did not happen.
+  if (instruction->GetLeft() == instruction->GetRight()) {
+    return;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if ((right->IsAdd() || right->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
+    return;
+  }
+  if ((left->IsAdd() || left->IsSub()) &&
+      TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
+    return;
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) {
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+
+  if (input_type == result_type) {
+    // We let the arch-independent code handle this.
+    return;
+  }
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4b697db..b7f490b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -39,9 +39,30 @@
                                     HInstruction* array,
                                     HInstruction* index,
                                     int access_size);
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use,
+                                  HInstruction* bitfield_op,
+                                  bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, true);
+  }
 
+  bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+                                           HBinaryOperation* input_binop,
+                                           HInstruction* input_other);
+
+  // HInstruction visitors, sorted alphabetically.
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
+  void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index dbe7524..8340811 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -89,10 +89,7 @@
   }
 }
 
-static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
-  if (instruction_set == kMips) {
-    return Intrinsics::kNone;
-  }
+static Intrinsics GetIntrinsic(InlineMethod method) {
   switch (method.opcode) {
     // Floating-point conversions.
     case kIntrinsicDoubleCvt:
@@ -387,7 +384,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
-      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
       invoke->IsInvokeVirtual() ? kVirtual : kSuper;
   switch (intrinsic_type) {
     case kStatic:
@@ -431,7 +428,7 @@
         DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file);
         DCHECK(inliner != nullptr);
         if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
-          Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet());
+          Intrinsics intrinsic = GetIntrinsic(method);
 
           if (intrinsic != Intrinsics::kNone) {
             if (!CheckInvokeType(intrinsic, invoke, dex_file)) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 0a5acc3..d2017da 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -44,7 +44,23 @@
 bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathARM slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathARM for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ assembler->
@@ -662,20 +678,23 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
   ArmAssembler* assembler = codegen->GetAssembler();
-  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();             // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
 
   if (type == Primitive::kPrimLong) {
-    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
     __ add(IP, base, ShifterOperand(offset));
     if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
-      Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
       __ ldrexd(trg_lo, trg_hi, IP);
     } else {
       __ ldrd(trg_lo, Address(IP));
     }
   } else {
-    Register trg = locations->Out().AsRegister<Register>();
+    Register trg = trg_loc.AsRegister<Register>();
     __ ldr(trg, Address(base, offset));
   }
 
@@ -684,14 +703,18 @@
   }
 
   if (type == Primitive::kPrimNot) {
-    Register trg = locations->Out().AsRegister<Register>();
-    __ MaybeUnpoisonHeapReference(trg);
+    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -936,6 +959,7 @@
   __ Bind(&loop_head);
 
   __ ldrex(tmp_lo, tmp_ptr);
+  // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
 
   __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
 
@@ -964,7 +988,11 @@
   // The UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
   // off temporarily as a quick fix.
+  //
   // TODO(rpl): Fix it and turn it back on.
+  //
+  // TODO(rpl): Also, we should investigate whether we need a read
+  // barrier in the generated code.
   if (kPoisonHeapReferences) {
     return;
   }
@@ -1400,6 +1428,10 @@
   }
 }
 
+// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
+// Note that this code path is not used (yet) because we do not
+// intrinsify methods that can go into the IntrinsicSlowPathARM
+// slow path.
 void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
   ArmAssembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 059abf0..b04dcce 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -143,7 +143,23 @@
 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathARM64 slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathARM64 for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ masm->
@@ -818,9 +834,12 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
-  Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
-  Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
-  Register trg = RegisterFrom(locations->Out(), type);
+  Location base_loc = locations->InAt(1);
+  Register base = WRegisterFrom(base_loc);      // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = XRegisterFrom(offset_loc);  // Long offset.
+  Location trg_loc = locations->Out();
+  Register trg = RegisterFrom(trg_loc, type);
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   MemOperand mem_op(base.X(), offset);
@@ -837,13 +856,18 @@
 
   if (type == Primitive::kPrimNot) {
     DCHECK(trg.IsW());
-    codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg);
+    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1057,6 +1081,9 @@
   if (use_acquire_release) {
     __ Bind(&loop_head);
     __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1065,6 +1092,9 @@
     __ Dmb(InnerShareable, BarrierWrites);
     __ Bind(&loop_head);
     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1090,7 +1120,11 @@
   // The UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
   // off temporarily as a quick fix.
+  //
   // TODO(rpl): Fix it and turn it back on.
+  //
+  // TODO(rpl): Also, we should investigate whether we need a read
+  // barrier in the generated code.
   if (kPoisonHeapReferences) {
     return;
   }
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5efcf4e..3268445 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -138,6 +138,323 @@
 
 #define __ assembler->
 
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    __ Mfc1(out_lo, in);
+    __ Mfhc1(out_hi, in);
+  } else {
+    Register out = locations->Out().AsRegister<Register>();
+
+    __ Mfc1(out, in);
+  }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+  FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    __ Mtc1(in_lo, out);
+    __ Mthc1(in_hi, out);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    __ Mtc1(in, out);
+  }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+                            Primitive::Type type,
+                            MipsAssembler* assembler,
+                            bool isR2OrNewer) {
+  DCHECK(type == Primitive::kPrimShort ||
+         type == Primitive::kPrimInt ||
+         type == Primitive::kPrimLong);
+
+  if (type == Primitive::kPrimShort) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Wsbh(out, in);
+      __ Seh(out, out);
+    } else {
+      __ Sll(TMP, in, 24);
+      __ Sra(TMP, TMP, 16);
+      __ Sll(out, in, 16);
+      __ Srl(out, out, 24);
+      __ Or(out, out, TMP);
+    }
+  } else if (type == Primitive::kPrimInt) {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(out, in, 16);
+      __ Wsbh(out, out);
+    } else {
+      // MIPS32r1
+      // __ Rotr(out, in, 16);
+      __ Sll(TMP, in, 16);
+      __ Srl(out, in, 16);
+      __ Or(out, out, TMP);
+      // __ Wsbh(out, out);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(TMP, out, AT);
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out, out, 8);
+      __ And(out, out, AT);
+      __ Or(out, out, TMP);
+    }
+  } else if (type == Primitive::kPrimLong) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+    Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+    if (isR2OrNewer) {
+      __ Rotr(AT, in_hi, 16);
+      __ Rotr(TMP, in_lo, 16);
+      __ Wsbh(out_lo, AT);
+      __ Wsbh(out_hi, TMP);
+    } else {
+      // When calling CreateIntToIntLocations() we promised that the
+      // use of the out_lo/out_hi wouldn't overlap with the use of
+      // in_lo/in_hi. Be very careful not to write to out_lo/out_hi
+      // until we're completely done reading from in_lo/in_hi.
+      // __ Rotr(TMP, in_lo, 16);
+      __ Sll(TMP, in_lo, 16);
+      __ Srl(AT, in_lo, 16);
+      __ Or(TMP, TMP, AT);             // Hold in TMP until it's safe
+                                       // to write to out_hi.
+      // __ Rotr(out_lo, in_hi, 16);
+      __ Sll(AT, in_hi, 16);
+      __ Srl(out_lo, in_hi, 16);        // Here we are finally done reading
+                                        // from in_lo/in_hi so it's okay to
+                                        // write to out_lo/out_hi.
+      __ Or(out_lo, out_lo, AT);
+      // __ Wsbh(out_hi, out_hi);
+      __ LoadConst32(AT, 0x00FF00FF);
+      __ And(out_hi, TMP, AT);
+      __ Sll(out_hi, out_hi, 8);
+      __ Srl(TMP, TMP, 8);
+      __ And(TMP, TMP, AT);
+      __ Or(out_hi, out_hi, TMP);
+      // __ Wsbh(out_lo, out_lo);
+      __ And(TMP, out_lo, AT);  // AT already holds the correct mask value
+      __ Sll(TMP, TMP, 8);
+      __ Srl(out_lo, out_lo, 8);
+      __ And(out_lo, out_lo, AT);
+      __ Or(out_lo, out_lo, TMP);
+    }
+  }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(),
+                  Primitive::kPrimInt,
+                  GetAssembler(),
+                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(),
+                  Primitive::kPrimLong,
+                  GetAssembler(),
+                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(),
+                  Primitive::kPrimShort,
+                  GetAssembler(),
+                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+  MipsLabel loop;
+  MipsLabel end;
+  MipsLabel return_true;
+  MipsLabel return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst32(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqz(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beq(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bne(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bne(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqz(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 2 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded.
+  __ Bind(&loop);
+  __ Lw(out, TMP, value_offset);
+  __ Lw(temp2, temp3, value_offset);
+  __ Bne(out, temp2, &return_false);
+  __ Addiu(TMP, TMP, 4);
+  __ Addiu(temp3, temp3, 4);
+  __ Addiu(temp1, temp1, -2);
+  __ Bgtz(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst32(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst32(out, 0);
+  __ Bind(&end);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -148,15 +465,8 @@
 
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat)
-UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble)
-UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits)
-UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits)
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
@@ -204,7 +514,6 @@
 UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(StringCharAt)
 UNIMPLEMENTED_INTRINSIC(StringCompareTo)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 05c7eb0..ecee11d 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -101,11 +101,10 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(A0));
-      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
-      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
-      UNREACHABLE();
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
     }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
@@ -116,7 +115,7 @@
     }
 
     RestoreLiveRegisters(codegen, invoke_->GetLocations());
-    __ B(GetExitLabel());
+    __ Bc(GetExitLabel());
   }
 
   const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; }
@@ -807,7 +806,7 @@
 
   DCHECK_NE(in, out);
 
-  Label done;
+  Mips64Label done;
 
   // double floor/ceil(double in) {
   //     if in.isNaN || in.isInfinite || in.isZero {
@@ -1257,7 +1256,7 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  Label loop_head, exit_loop;
+  Mips64Label loop_head, exit_loop;
   __ Daddu(TMP, base, offset);
   __ Sync(0);
   __ Bind(&loop_head);
@@ -1392,6 +1391,108 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
+  Mips64Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>();
+  GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+  GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>();
+  GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+  Mips64Label loop;
+  Mips64Label end;
+  Mips64Label return_true;
+  Mips64Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst64(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqzc(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beqc(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bnec(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bnec(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqzc(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 4 characters at a time.
+  DCHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 4 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded to be 8-byte aligned.
+  __ Bind(&loop);
+  __ Ld(out, TMP, value_offset);
+  __ Ld(temp2, temp3, value_offset);
+  __ Bnec(out, temp2, &return_false);
+  __ Daddiu(TMP, TMP, 8);
+  __ Daddiu(temp3, temp3, 8);
+  __ Addiu(temp1, temp1, -4);
+  __ Bgtzc(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst64(out, 1);
+  __ Bc(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst64(out, 0);
+  __ Bind(&end);
+}
+
 static void GenerateStringIndexOf(HInvoke* invoke,
                                   Mips64Assembler* assembler,
                                   CodeGeneratorMIPS64* codegen,
@@ -1413,7 +1514,7 @@
       // full slow-path down and branch unconditionally.
       slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
       codegen->AddSlowPath(slow_path);
-      __ B(slow_path->GetEntryLabel());
+      __ Bc(slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
       return;
     }
@@ -1587,8 +1688,6 @@
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
 
-UNIMPLEMENTED_INTRINSIC(StringEquals)
-
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 040bf6a..371588f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -55,7 +55,23 @@
 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathX86 slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathX86 for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -1571,26 +1587,32 @@
   GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
 }
 
-static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
-                         bool is_volatile, X86Assembler* assembler) {
-  Register base = locations->InAt(1).AsRegister<Register>();
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
-  Location output = locations->Out();
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorX86* codegen) {
+  X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();
+  Location output_loc = locations->Out();
 
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register output_reg = output.AsRegister<Register>();
-      __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      Register output = output_loc.AsRegister<Register>();
+      __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
       if (type == Primitive::kPrimNot) {
-        __ MaybeUnpoisonHeapReference(output_reg);
+        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-        Register output_lo = output.AsRegisterPairLow<Register>();
-        Register output_hi = output.AsRegisterPairHigh<Register>();
+        Register output_lo = output_loc.AsRegisterPairLow<Register>();
+        Register output_hi = output_loc.AsRegisterPairHigh<Register>();
         if (is_volatile) {
           // Need to use a XMM to read atomically.
           XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
@@ -1613,8 +1635,13 @@
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
                                           bool is_long, bool is_volatile) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1653,22 +1680,22 @@
 
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
 }
 
 
@@ -1890,13 +1917,18 @@
 
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
 
-    // locked cmpxchg has full barrier semantics, and we don't need
+    // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
     __ setb(kZero, out.AsRegister<Register>());
     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
 
+    // In the case of the `UnsafeCASObject` intrinsic, accessing an
+    // object in the heap with LOCK CMPXCHG does not require a read
+    // barrier, as we do not keep a reference to this heap location.
+    // However, if heap poisoning is enabled, we need to unpoison the
+    // values that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value` has been moved to a temporary register, no need to
@@ -1929,8 +1961,8 @@
       LOG(FATAL) << "Unexpected CAS type " << type;
     }
 
-    // locked cmpxchg has full barrier semantics, and we don't need
-    // scheduling barriers at this time.
+    // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
+    // don't need scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
     __ setb(kZero, out.AsRegister<Register>());
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 14c65c9..2d9f01b 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -50,8 +50,24 @@
 
 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
-  const LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  LocationSummary* res = invoke->GetLocations();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathX86_64 slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathX86_64 for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
@@ -917,6 +933,10 @@
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
+// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
+// Note that this code path is not used (yet) because we do not
+// intrinsify methods that can go into the IntrinsicSlowPathX86_64
+// slow path.
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
   X86_64Assembler* assembler = GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
@@ -1605,7 +1625,7 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
+  locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
 }
 
 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
@@ -1698,23 +1718,30 @@
   GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
 }
 
-static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
-                         bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
-  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
-  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
-  CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile ATTRIBUTE_UNUSED,
+                         CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+  Location base_loc = locations->InAt(1);
+  CpuRegister base = base_loc.AsRegister<CpuRegister>();
+  Location offset_loc = locations->InAt(2);
+  CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
+  Location output_loc = locations->Out();
+  CpuRegister output = locations->Out().AsRegister<CpuRegister>();
 
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
       if (type == Primitive::kPrimNot) {
-        __ MaybeUnpoisonHeapReference(trg);
+        codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
       }
       break;
 
     case Primitive::kPrimLong:
-      __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
       break;
 
     default:
@@ -1724,8 +1751,13 @@
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1754,22 +1786,22 @@
 
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
 }
 
 
@@ -1961,13 +1993,18 @@
 
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
 
-    // locked cmpxchg has full barrier semantics, and we don't need
+    // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
     __ setcc(kZero, out);
     __ movzxb(out, out);
 
+    // In the case of the `UnsafeCASObject` intrinsic, accessing an
+    // object in the heap with LOCK CMPXCHG does not require a read
+    // barrier, as we do not keep a reference to this heap location.
+    // However, if heap poisoning is enabled, we need to unpoison the
+    // values that were poisoned earlier.
     if (kPoisonHeapReferences) {
       if (base_equals_value) {
         // `value_reg` has been moved to a temporary register, no need
@@ -1992,7 +2029,7 @@
       LOG(FATAL) << "Unexpected CAS type " << type;
     }
 
-    // locked cmpxchg has full barrier semantics, and we don't need
+    // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
     // Convert ZF into the boolean result.
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 47457de..2bb769a 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -42,12 +42,14 @@
     loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
     loop_header_ = new (&allocator_) HBasicBlock(graph_);
     loop_body_ = new (&allocator_) HBasicBlock(graph_);
+    return_ = new (&allocator_) HBasicBlock(graph_);
     exit_ = new (&allocator_) HBasicBlock(graph_);
 
     graph_->AddBlock(entry_);
     graph_->AddBlock(loop_preheader_);
     graph_->AddBlock(loop_header_);
     graph_->AddBlock(loop_body_);
+    graph_->AddBlock(return_);
     graph_->AddBlock(exit_);
 
     graph_->SetEntryBlock(entry_);
@@ -57,8 +59,9 @@
     entry_->AddSuccessor(loop_preheader_);
     loop_preheader_->AddSuccessor(loop_header_);
     loop_header_->AddSuccessor(loop_body_);
-    loop_header_->AddSuccessor(exit_);
+    loop_header_->AddSuccessor(return_);
     loop_body_->AddSuccessor(loop_header_);
+    return_->AddSuccessor(exit_);
 
     // Provide boiler-plate instructions.
     parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
@@ -89,6 +92,7 @@
   HBasicBlock* loop_preheader_;
   HBasicBlock* loop_header_;
   HBasicBlock* loop_body_;
+  HBasicBlock* return_;
   HBasicBlock* exit_;
 
   HInstruction* parameter_;  // "this"
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 6fbb682..5b89cfe 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -119,19 +119,10 @@
       : ref_info_(ref_info),
         offset_(offset),
         index_(index),
-        declaring_class_def_index_(declaring_class_def_index),
-        may_become_unknown_(true) {
+        declaring_class_def_index_(declaring_class_def_index) {
     DCHECK(ref_info != nullptr);
     DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
            (offset != kInvalidFieldOffset && index == nullptr));
-
-    if (ref_info->IsSingletonAndNotReturned()) {
-      // We try to track stores to singletons that aren't returned to eliminate the stores
-      // since values in singleton's fields cannot be killed due to aliasing. Those values
-      // can still be killed due to merging values since we don't build phi for merging heap
-      // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible.
-      may_become_unknown_ = false;
-    }
   }
 
   ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
@@ -148,21 +139,11 @@
     return index_ != nullptr;
   }
 
-  // Returns true if this heap location's value may become unknown after it's
-  // set to a value, due to merge of values, or killed due to aliasing.
-  bool MayBecomeUnknown() const {
-    return may_become_unknown_;
-  }
-  void SetMayBecomeUnknown(bool val) {
-    may_become_unknown_ = val;
-  }
-
  private:
   ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
   const size_t offset_;                // offset of static/instance field.
   HInstruction* const index_;          // index of an array element.
   const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
-  bool may_become_unknown_;            // value may become kUnknownHeapValue.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocation);
 };
@@ -381,26 +362,13 @@
     return heap_locations_[heap_location_idx];
   }
 
-  void VisitFieldAccess(HInstruction* field_access,
-                        HInstruction* ref,
-                        const FieldInfo& field_info,
-                        bool is_store) {
+  void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
     if (field_info.IsVolatile()) {
       has_volatile_ = true;
     }
     const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
     const size_t offset = field_info.GetFieldOffset().SizeValue();
-    HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
-    // A store of a value may be eliminated if all future loads for that value can be eliminated.
-    // For a value that's stored into a singleton field, the value will not be killed due
-    // to aliasing. However if the value is set in a block that doesn't post dominate the definition,
-    // the value may be killed due to merging later. Before we have post dominating info, we check
-    // if the store is in the same block as the definition just to be conservative.
-    if (is_store &&
-        location->GetReferenceInfo()->IsSingletonAndNotReturned() &&
-        field_access->GetBlock() != ref->GetBlock()) {
-      location->SetMayBecomeUnknown(true);
-    }
+    GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
   }
 
   void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -409,20 +377,20 @@
   }
 
   void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
   }
 
   void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
   }
 
   void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
   }
 
@@ -464,9 +432,14 @@
 };
 
 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
+// A heap location can be set to kUnknownHeapValue when:
+// - initially set a value.
+// - killed due to aliasing, merging, invocation, or loop side effects.
 static HInstruction* const kUnknownHeapValue =
     reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1));
+
 // Default heap value after an allocation.
+// A heap location can be set to that value right after an allocation.
 static HInstruction* const kDefaultHeapValue =
     reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2));
 
@@ -484,29 +457,17 @@
                                                     kUnknownHeapValue,
                                                     graph->GetArena()->Adapter(kArenaAllocLSE)),
                          graph->GetArena()->Adapter(kArenaAllocLSE)),
-        removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
-    int block_id = block->GetBlockId();
-    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    // Populate the heap_values array for this block.
     // TODO: try to reuse the heap_values array from one predecessor if possible.
     if (block->IsLoopHeader()) {
-      // We do a single pass in reverse post order. For loops, use the side effects as a hint
-      // to see if the heap values should be killed.
-      if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
-        // Leave all values as kUnknownHeapValue.
-      } else {
-        // Inherit the values from pre-header.
-        HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
-        ArenaVector<HInstruction*>& pre_header_heap_values =
-            heap_values_for_[pre_header->GetBlockId()];
-        for (size_t i = 0; i < heap_values.size(); i++) {
-          heap_values[i] = pre_header_heap_values[i];
-        }
-      }
+      HandleLoopSideEffects(block);
     } else {
       MergePredecessorValues(block);
     }
@@ -515,23 +476,34 @@
 
   // Remove recorded instructions that should be eliminated.
   void RemoveInstructions() {
-    size_t size = removed_instructions_.size();
-    DCHECK_EQ(size, substitute_instructions_.size());
+    size_t size = removed_loads_.size();
+    DCHECK_EQ(size, substitute_instructions_for_loads_.size());
     for (size_t i = 0; i < size; i++) {
-      HInstruction* instruction = removed_instructions_[i];
-      DCHECK(instruction != nullptr);
-      HInstruction* substitute = substitute_instructions_[i];
-      if (substitute != nullptr) {
-        // Keep tracing substitute till one that's not removed.
-        HInstruction* sub_sub = FindSubstitute(substitute);
-        while (sub_sub != substitute) {
-          substitute = sub_sub;
-          sub_sub = FindSubstitute(substitute);
-        }
-        instruction->ReplaceWith(substitute);
+      HInstruction* load = removed_loads_[i];
+      DCHECK(load != nullptr);
+      DCHECK(load->IsInstanceFieldGet() ||
+             load->IsStaticFieldGet() ||
+             load->IsArrayGet());
+      HInstruction* substitute = substitute_instructions_for_loads_[i];
+      DCHECK(substitute != nullptr);
+      // Keep tracing substitute till one that's not removed.
+      HInstruction* sub_sub = FindSubstitute(substitute);
+      while (sub_sub != substitute) {
+        substitute = sub_sub;
+        sub_sub = FindSubstitute(substitute);
       }
-      instruction->GetBlock()->RemoveInstruction(instruction);
+      load->ReplaceWith(substitute);
+      load->GetBlock()->RemoveInstruction(load);
     }
+
+    // At this point, stores in possibly_removed_stores_ can be safely removed.
+    size = possibly_removed_stores_.size();
+    for (size_t i = 0; i < size; i++) {
+      HInstruction* store = possibly_removed_stores_[i];
+      DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
+      store->GetBlock()->RemoveInstruction(store);
+    }
+
     // TODO: remove unnecessary allocations.
     // Eliminate instructions in singleton_new_instances_ that:
     // - don't have uses,
@@ -541,6 +513,52 @@
   }
 
  private:
+  // If heap_values[index] is an instance field store, need to keep the store.
+  // This is necessary if a heap value is killed due to merging, or loop side
+  // effects (which is essentially merging also), since a load later from the
+  // location won't be eliminated.
+  void KeepIfIsStore(HInstruction* heap_value) {
+    if (heap_value == kDefaultHeapValue ||
+        heap_value == kUnknownHeapValue ||
+        !heap_value->IsInstanceFieldSet()) {
+      return;
+    }
+    auto idx = std::find(possibly_removed_stores_.begin(),
+        possibly_removed_stores_.end(), heap_value);
+    if (idx != possibly_removed_stores_.end()) {
+      // Make sure the store is kept.
+      possibly_removed_stores_.erase(idx);
+    }
+  }
+
+  void HandleLoopSideEffects(HBasicBlock* block) {
+    DCHECK(block->IsLoopHeader());
+    int block_id = block->GetBlockId();
+    ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+    HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+    ArenaVector<HInstruction*>& pre_header_heap_values =
+        heap_values_for_[pre_header->GetBlockId()];
+    // We do a single pass in reverse post order. For loops, use the side effects as a hint
+    // to see if the heap values should be killed.
+    if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
+      for (size_t i = 0; i < pre_header_heap_values.size(); i++) {
+        // heap value is killed by loop side effects, need to keep the last store.
+        KeepIfIsStore(pre_header_heap_values[i]);
+      }
+      if (kIsDebugBuild) {
+        // heap_values should all be kUnknownHeapValue that it is inited with.
+        for (size_t i = 0; i < heap_values.size(); i++) {
+          DCHECK_EQ(heap_values[i], kUnknownHeapValue);
+        }
+      }
+    } else {
+      // Inherit the values from pre-header.
+      for (size_t i = 0; i < heap_values.size(); i++) {
+        heap_values[i] = pre_header_heap_values[i];
+      }
+    }
+  }
+
   void MergePredecessorValues(HBasicBlock* block) {
     const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
     if (predecessors.size() == 0) {
@@ -548,16 +566,25 @@
     }
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
-      HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i];
-      if (value != kUnknownHeapValue) {
+      HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
+      heap_values[i] = pred0_value;
+      if (pred0_value != kUnknownHeapValue) {
         for (size_t j = 1; j < predecessors.size(); j++) {
-          if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) {
-            value = kUnknownHeapValue;
+          HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
+          if (pred_value != pred0_value) {
+            heap_values[i] = kUnknownHeapValue;
             break;
           }
         }
       }
-      heap_values[i] = value;
+
+      if (heap_values[i] == kUnknownHeapValue) {
+        // Keep the last store in each predecessor since future loads cannot be eliminated.
+        for (size_t j = 0; j < predecessors.size(); j++) {
+          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+          KeepIfIsStore(pred_values[i]);
+        }
+      }
     }
   }
 
@@ -616,21 +643,30 @@
     HInstruction* heap_value = heap_values[idx];
     if (heap_value == kDefaultHeapValue) {
       HInstruction* constant = GetDefaultValue(instruction->GetType());
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(constant);
+      removed_loads_.push_back(instruction);
+      substitute_instructions_for_loads_.push_back(constant);
       heap_values[idx] = constant;
       return;
     }
+    if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
+      HInstruction* store = heap_value;
+      // This load must be from a singleton since it's from the same field
+      // that a "removed" store puts the value. That store must be to a singleton's field.
+      DCHECK(ref_info->IsSingleton());
+      // Get the real heap value of the store.
+      heap_value = store->InputAt(1);
+    }
     if ((heap_value != kUnknownHeapValue) &&
         // Keep the load due to possible I/F, J/D array aliasing.
         // See b/22538329 for details.
         (heap_value->GetType() == instruction->GetType())) {
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(heap_value);
+      removed_loads_.push_back(instruction);
+      substitute_instructions_for_loads_.push_back(heap_value);
       TryRemovingNullCheck(instruction);
       return;
     }
 
+    // Load isn't eliminated.
     if (heap_value == kUnknownHeapValue) {
       // Put the load as the value into the HeapLocation.
       // This acts like GVN but with better aliasing analysis.
@@ -662,51 +698,63 @@
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[instruction->GetBlock()->GetBlockId()];
     HInstruction* heap_value = heap_values[idx];
-    bool redundant_store = false;
+    bool same_value = false;
+    bool possibly_redundant = false;
     if (Equal(heap_value, value)) {
       // Store into the heap location with the same value.
-      redundant_store = true;
+      same_value = true;
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
     } else if (!heap_location_collector_.MayDeoptimize() &&
-               ref_info->IsSingletonAndNotReturned() &&
-               !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) {
-      // Store into a field of a singleton that's not returned. And that value cannot be
-      // killed due to merge. It's redundant since future loads will get the value
-      // set by this instruction.
-      Primitive::Type type = Primitive::kPrimVoid;
-      if (instruction->IsInstanceFieldSet()) {
-        type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType();
-      } else if (instruction->IsStaticFieldSet()) {
-        type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType();
+               ref_info->IsSingletonAndNotReturned()) {
+      // Store into a field of a singleton that's not returned. The value cannot be
+      // killed due to aliasing/invocation. It can be redundant since future loads can
+      // directly get the value set by this instruction. The value can still be killed due to
+      // merging or loop side effects. Stores whose values are killed due to merging/loop side
+      // effects later will be removed from possibly_removed_stores_ when that is detected.
+      possibly_redundant = true;
+      HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
+      DCHECK(new_instance != nullptr);
+      if (new_instance->IsFinalizable()) {
+        // Finalizable objects escape globally. Need to keep the store.
+        possibly_redundant = false;
       } else {
-        DCHECK(false) << "Must be an instance/static field set instruction.";
+        HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+        if (loop_info != nullptr) {
+          // instruction is a store in the loop so the loop must does write.
+          DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+
+          if (loop_info->IsLoopInvariant(original_ref, false)) {
+            DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
+            // Keep the store since its value may be needed at the loop header.
+            possibly_redundant = false;
+          } else {
+            // The singleton is created inside the loop. Value stored to it isn't needed at
+            // the loop header. This is true for outer loops also.
+          }
+        }
       }
-      if (value->GetType() != type) {
-        // I/F, J/D aliasing should not happen for fields.
-        DCHECK(Primitive::IsIntegralType(value->GetType()));
-        DCHECK(!Primitive::Is64BitType(value->GetType()));
-        DCHECK(Primitive::IsIntegralType(type));
-        DCHECK(!Primitive::Is64BitType(type));
-        // Keep the store since the corresponding load isn't eliminated due to different types.
-        // TODO: handle the different int types so that we can eliminate this store.
-        redundant_store = false;
-      } else {
-        redundant_store = true;
-      }
-      // TODO: eliminate the store if the singleton object is not finalizable.
-      redundant_store = false;
     }
-    if (redundant_store) {
-      removed_instructions_.push_back(instruction);
-      substitute_instructions_.push_back(nullptr);
-      TryRemovingNullCheck(instruction);
+    if (same_value || possibly_redundant) {
+      possibly_removed_stores_.push_back(instruction);
     }
 
-    heap_values[idx] = value;
+    if (!same_value) {
+      if (possibly_redundant) {
+        DCHECK(instruction->IsInstanceFieldSet());
+        // Put the store as the heap value. If the value is loaded from heap
+        // by a load later, this store isn't really redundant.
+        heap_values[idx] = instruction;
+      } else {
+        heap_values[idx] = value;
+      }
+    }
     // This store may kill values in other heap locations due to aliasing.
     for (size_t i = 0; i < heap_values.size(); i++) {
+      if (i == idx) {
+        continue;
+      }
       if (heap_values[i] == value) {
         // Same value should be kept even if aliasing happens.
         continue;
@@ -834,9 +882,10 @@
       return;
     }
     if (!heap_location_collector_.MayDeoptimize() &&
-        ref_info->IsSingletonAndNotReturned()) {
-      // The allocation might be eliminated.
-      singleton_new_instances_.push_back(new_instance);
+        ref_info->IsSingletonAndNotReturned() &&
+        !new_instance->IsFinalizable() &&
+        !new_instance->CanThrow()) {
+      // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination.
     }
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[new_instance->GetBlock()->GetBlockId()];
@@ -854,10 +903,10 @@
   // Find an instruction's substitute if it should be removed.
   // Return the same instruction if it should not be removed.
   HInstruction* FindSubstitute(HInstruction* instruction) {
-    size_t size = removed_instructions_.size();
+    size_t size = removed_loads_.size();
     for (size_t i = 0; i < size; i++) {
-      if (removed_instructions_[i] == instruction) {
-        return substitute_instructions_[i];
+      if (removed_loads_[i] == instruction) {
+        return substitute_instructions_for_loads_[i];
       }
     }
     return instruction;
@@ -871,8 +920,13 @@
 
   // We record the instructions that should be eliminated but may be
   // used by heap locations. They'll be removed in the end.
-  ArenaVector<HInstruction*> removed_instructions_;
-  ArenaVector<HInstruction*> substitute_instructions_;
+  ArenaVector<HInstruction*> removed_loads_;
+  ArenaVector<HInstruction*> substitute_instructions_for_loads_;
+
+  // Stores in this list may be removed from the list later when it's
+  // found that the store cannot be eliminated.
+  ArenaVector<HInstruction*> possibly_removed_stores_;
+
   ArenaVector<HInstruction*> singleton_new_instances_;
 
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ebdf7a2..1ab206f 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -17,6 +17,7 @@
 #include "locations.h"
 
 #include "nodes.h"
+#include "code_generator.h"
 
 namespace art {
 
@@ -47,18 +48,26 @@
       : Location::RequiresRegister();
 }
 
-Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
-  if (instruction->IsIntConstant() || instruction->IsNullConstant()) {
-    return Location::ConstantLocation(instruction->AsConstant());
-  } else if (instruction->IsLongConstant()) {
-    // Does the long constant fit in a 32 bit int?
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    return IsInt<32>(value)
-        ? Location::ConstantLocation(instruction->AsConstant())
-        : Location::RequiresRegister();
-  } else {
-    return Location::RequiresRegister();
+Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
+  HConstant* constant = instruction->AsConstant();
+  if (constant != nullptr) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+    if (IsInt<32>(value)) {
+      return Location::ConstantLocation(constant);
+    }
   }
+  return Location::RequiresRegister();
+}
+
+Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
+  HConstant* constant = instruction->AsConstant();
+  if (constant != nullptr) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+    if (IsInt<32>(value)) {
+      return Location::ConstantLocation(constant);
+    }
+  }
+  return Location::RequiresFpuRegister();
 }
 
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
@@ -67,6 +76,12 @@
       : Location::RegisterLocation(reg);
 }
 
+Location Location::FpuRegisterOrConstant(HInstruction* instruction) {
+  return instruction->IsConstant()
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresFpuRegister();
+}
+
 std::ostream& operator<<(std::ostream& os, const Location& location) {
   os << location.DebugString();
   if (location.IsRegister() || location.IsFpuRegister()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index d014379..63bbc2c 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -354,8 +354,10 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
-  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
+  static Location RegisterOrInt32Constant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
+  static Location FpuRegisterOrConstant(HInstruction* instruction);
+  static Location FpuRegisterOrInt32Constant(HInstruction* instruction);
 
   // The location of the first input to the instruction will be
   // used to replace this unallocated location.
@@ -592,6 +594,10 @@
     return intrinsified_;
   }
 
+  void SetIntrinsified(bool intrinsified) {
+    intrinsified_ = intrinsified;
+  }
+
  private:
   ArenaVector<Location> inputs_;
   ArenaVector<Location> temps_;
@@ -611,7 +617,7 @@
   RegisterSet live_registers_;
 
   // Whether these are locations for an intrinsified call.
-  const bool intrinsified_;
+  bool intrinsified_;
 
   ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
   ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 68fb0ac..9b26de4 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,7 @@
 #include "nodes.h"
 
 #include "code_generator.h"
+#include "common_dominator.h"
 #include "ssa_builder.h"
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
@@ -179,7 +180,10 @@
       if (successor->GetDominator() == nullptr) {
         successor->SetDominator(current);
       } else {
-        successor->SetDominator(FindCommonDominator(successor->GetDominator(), current));
+        // The CommonDominator can work for multiple blocks as long as the
+        // domination information doesn't change. However, since we're changing
+        // that information here, we can use the finder only for pairs of blocks.
+        successor->SetDominator(CommonDominator::ForPair(successor->GetDominator(), current));
       }
 
       // Once all the forward edges have been visited, we know the immediate
@@ -194,24 +198,6 @@
   }
 }
 
-HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const {
-  ArenaBitVector visited(arena_, blocks_.size(), false);
-  // Walk the dominator tree of the first block and mark the visited blocks.
-  while (first != nullptr) {
-    visited.SetBit(first->GetBlockId());
-    first = first->GetDominator();
-  }
-  // Walk the dominator tree of the second block until a marked block is found.
-  while (second != nullptr) {
-    if (visited.IsBitSet(second->GetBlockId())) {
-      return second;
-    }
-    second = second->GetDominator();
-  }
-  LOG(ERROR) << "Could not find common dominator";
-  return nullptr;
-}
-
 void HGraph::TransformToSsa() {
   DCHECK(!reverse_post_order_.empty());
   SsaBuilder ssa_builder(this);
@@ -335,14 +321,24 @@
       // instructions into `normal_block` and links the two blocks with a Goto.
       // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
       // leaving `catch_block` with the exceptional edges only.
+      //
       // Note that catch blocks with normal-flow predecessors cannot begin with
-      // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
-      DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
-      HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
-      for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-        if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
-          --j;
+      // a move-exception instruction, as guaranteed by the verifier. However,
+      // trivially dead predecessors are ignored by the verifier and such code
+      // has not been removed at this stage. We therefore ignore the assumption
+      // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628).
+      HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException();
+      if (normal_block == nullptr) {
+        // Catch block is either empty or only contains a move-exception. It must
+        // therefore be dead and will be removed during initial DCE. Do nothing.
+        DCHECK(!catch_block->EndsWithControlFlowInstruction());
+      } else {
+        // Catch block was split. Re-link normal-flow edges to the new block.
+        for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
+          if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
+            catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
+            --j;
+          }
         }
       }
     }
@@ -366,28 +362,45 @@
     HBasicBlock* first_predecessor = block->GetPredecessors()[0];
     DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
     const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
-    if (try_entry != nullptr) {
+    if (try_entry != nullptr &&
+        (block->GetTryCatchInformation() == nullptr ||
+         try_entry != &block->GetTryCatchInformation()->GetTryEntry())) {
+      // We are either setting try block membership for the first time or it
+      // has changed.
       block->SetTryCatchInformation(new (arena_) TryCatchInformation(*try_entry));
     }
   }
 }
 
 void HGraph::SimplifyCFG() {
-  // Simplify the CFG for future analysis, and code generation:
+// Simplify the CFG for future analysis, and code generation:
   // (1): Split critical edges.
-  // (2): Simplify loops by having only one back edge, and one preheader.
+  // (2): Simplify loops by having only one preheader.
   // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
   // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
   for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
     HBasicBlock* block = blocks_[block_id];
     if (block == nullptr) continue;
-    if (block->NumberOfNormalSuccessors() > 1) {
-      for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
-        HBasicBlock* successor = block->GetSuccessors()[j];
+    if (block->GetSuccessors().size() > 1) {
+      // Only split normal-flow edges. We cannot split exceptional edges as they
+      // are synthesized (approximate real control flow), and we do not need to
+      // anyway. Moves that would be inserted there are performed by the runtime.
+      ArrayRef<HBasicBlock* const> normal_successors = block->GetNormalSuccessors();
+      for (size_t j = 0, e = normal_successors.size(); j < e; ++j) {
+        HBasicBlock* successor = normal_successors[j];
         DCHECK(!successor->IsCatchBlock());
-        if (successor->GetPredecessors().size() > 1) {
+        if (successor == exit_block_) {
+          // Throw->TryBoundary->Exit. Special case which we do not want to split
+          // because Goto->Exit is not allowed.
+          DCHECK(block->IsSingleTryBoundary());
+          DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow());
+        } else if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
-          --j;
+          // SplitCriticalEdge could have invalidated the `normal_successors`
+          // ArrayRef. We must re-acquire it.
+          normal_successors = block->GetNormalSuccessors();
+          DCHECK_EQ(normal_successors[j]->GetSingleSuccessor(), successor);
+          DCHECK_EQ(e, normal_successors.size());
         }
       }
     }
@@ -1082,6 +1095,8 @@
     } else if (GetRight()->IsLongConstant()) {
       return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
     }
+  } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+    return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
   }
   return nullptr;
 }
@@ -1162,8 +1177,61 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  HUseIterator<HInstruction*> uses_it(GetUses());
+  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+  uses_it.Advance();
+  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+    uses_it.Advance();
+  }
+  if (!uses_it.Done()) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; !uses_it.Done(); uses_it.Advance()) {
+      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = uses_it2.Current()->GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
@@ -1193,7 +1261,7 @@
 }
 
 HBasicBlock* HBasicBlock::CreateImmediateDominator() {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented.";
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
@@ -1209,6 +1277,34 @@
   return new_block;
 }
 
+HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
+  DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only.";
+
+  HInstruction* first_insn = GetFirstInstruction();
+  HInstruction* split_before = nullptr;
+
+  if (first_insn != nullptr && first_insn->IsLoadException()) {
+    // Catch block starts with a LoadException. Split the block after
+    // the StoreLocal and ClearException which must come after the load.
+    DCHECK(first_insn->GetNext()->IsStoreLocal());
+    DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
+    split_before = first_insn->GetNext()->GetNext()->GetNext();
+  } else {
+    // Catch block does not load the exception. Split at the beginning
+    // to create an empty catch block.
+    split_before = first_insn;
+  }
+
+  if (split_before == nullptr) {
+    // Catch block has no instructions after the split point (must be dead).
+    // Do not split it but rather signal error by returning nullptr.
+    return nullptr;
+  } else {
+    return SplitBefore(split_before);
+  }
+}
+
 HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
@@ -1293,17 +1389,38 @@
   return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
 }
 
+ArrayRef<HBasicBlock* const> HBasicBlock::GetNormalSuccessors() const {
+  if (EndsWithTryBoundary()) {
+    // The normal-flow successor of HTryBoundary is always stored at index zero.
+    DCHECK_EQ(successors_[0], GetLastInstruction()->AsTryBoundary()->GetNormalFlowSuccessor());
+    return ArrayRef<HBasicBlock* const>(successors_).SubArray(0u, 1u);
+  } else {
+    // All successors of blocks not ending with TryBoundary are normal.
+    return ArrayRef<HBasicBlock* const>(successors_);
+  }
+}
+
+ArrayRef<HBasicBlock* const> HBasicBlock::GetExceptionalSuccessors() const {
+  if (EndsWithTryBoundary()) {
+    return GetLastInstruction()->AsTryBoundary()->GetExceptionHandlers();
+  } else {
+    // Blocks not ending with TryBoundary do not have exceptional successors.
+    return ArrayRef<HBasicBlock* const>();
+  }
+}
+
 bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const {
-  if (GetBlock()->GetSuccessors().size() != other.GetBlock()->GetSuccessors().size()) {
+  ArrayRef<HBasicBlock* const> handlers1 = GetExceptionHandlers();
+  ArrayRef<HBasicBlock* const> handlers2 = other.GetExceptionHandlers();
+
+  size_t length = handlers1.size();
+  if (length != handlers2.size()) {
     return false;
   }
 
   // Exception handlers need to be stored in the same order.
-  for (HExceptionHandlerIterator it1(*this), it2(other);
-       !it1.Done();
-       it1.Advance(), it2.Advance()) {
-    DCHECK(!it2.Done());
-    if (it1.Current() != it2.Current()) {
+  for (size_t i = 0; i < length; ++i) {
+    if (handlers1[i] != handlers2[i]) {
       return false;
     }
   }
@@ -1356,7 +1473,7 @@
   // iteration.
   DCHECK(dominated_blocks_.empty());
 
-  // Remove the block from all loops it is included in.
+  // (1) Remove the block from all loops it is included in.
   for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
     HLoopInformation* loop_info = it.Current();
     loop_info->Remove(this);
@@ -1368,17 +1485,34 @@
     }
   }
 
-  // Disconnect the block from its predecessors and update their control-flow
-  // instructions.
+  // (2) Disconnect the block from its predecessors and update their
+  //     control-flow instructions.
   for (HBasicBlock* predecessor : predecessors_) {
     HInstruction* last_instruction = predecessor->GetLastInstruction();
+    if (last_instruction->IsTryBoundary() && !IsCatchBlock()) {
+      // This block is the only normal-flow successor of the TryBoundary which
+      // makes `predecessor` dead. Since DCE removes blocks in post order,
+      // exception handlers of this TryBoundary were already visited and any
+      // remaining handlers therefore must be live. We remove `predecessor` from
+      // their list of predecessors.
+      DCHECK_EQ(last_instruction->AsTryBoundary()->GetNormalFlowSuccessor(), this);
+      while (predecessor->GetSuccessors().size() > 1) {
+        HBasicBlock* handler = predecessor->GetSuccessors()[1];
+        DCHECK(handler->IsCatchBlock());
+        predecessor->RemoveSuccessor(handler);
+        handler->RemovePredecessor(predecessor);
+      }
+    }
+
     predecessor->RemoveSuccessor(this);
     uint32_t num_pred_successors = predecessor->GetSuccessors().size();
     if (num_pred_successors == 1u) {
       // If we have one successor after removing one, then we must have
-      // had an HIf or HPackedSwitch, as they have more than one successor.
-      // Replace those with a HGoto.
-      DCHECK(last_instruction->IsIf() || last_instruction->IsPackedSwitch());
+      // had an HIf, HPackedSwitch or HTryBoundary, as they have more than one
+      // successor. Replace those with a HGoto.
+      DCHECK(last_instruction->IsIf() ||
+             last_instruction->IsPackedSwitch() ||
+             (last_instruction->IsTryBoundary() && IsCatchBlock()));
       predecessor->RemoveInstruction(last_instruction);
       predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc()));
     } else if (num_pred_successors == 0u) {
@@ -1387,15 +1521,17 @@
       // SSAChecker fails unless it is not removed during the pass too.
       predecessor->RemoveInstruction(last_instruction);
     } else {
-      // There are multiple successors left.  This must come from a HPackedSwitch
-      // and we are in the middle of removing the HPackedSwitch. Like above, leave
-      // this alone, and the SSAChecker will fail if it is not removed as well.
-      DCHECK(last_instruction->IsPackedSwitch());
+      // There are multiple successors left. The removed block might be a successor
+      // of a PackedSwitch which will be completely removed (perhaps replaced with
+      // a Goto), or we are deleting a catch block from a TryBoundary. In either
+      // case, leave `last_instruction` as is for now.
+      DCHECK(last_instruction->IsPackedSwitch() ||
+             (last_instruction->IsTryBoundary() && IsCatchBlock()));
     }
   }
   predecessors_.clear();
 
-  // Disconnect the block from its successors and update their phis.
+  // (3) Disconnect the block from its successors and update their phis.
   for (HBasicBlock* successor : successors_) {
     // Delete this block from the list of predecessors.
     size_t this_index = successor->GetPredecessorIndexOf(this);
@@ -1405,30 +1541,57 @@
     // dominator of `successor` which violates the order DCHECKed at the top.
     DCHECK(!successor->predecessors_.empty());
 
-    // Remove this block's entries in the successor's phis.
-    if (successor->predecessors_.size() == 1u) {
-      // The successor has just one predecessor left. Replace phis with the only
-      // remaining input.
-      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-        HPhi* phi = phi_it.Current()->AsPhi();
-        phi->ReplaceWith(phi->InputAt(1 - this_index));
-        successor->RemovePhi(phi);
-      }
-    } else {
-      for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-        phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+    // Remove this block's entries in the successor's phis. Skip exceptional
+    // successors because catch phi inputs do not correspond to predecessor
+    // blocks but throwing instructions. Their inputs will be updated in step (4).
+    if (!successor->IsCatchBlock()) {
+      if (successor->predecessors_.size() == 1u) {
+        // The successor has just one predecessor left. Replace phis with the only
+        // remaining input.
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          HPhi* phi = phi_it.Current()->AsPhi();
+          phi->ReplaceWith(phi->InputAt(1 - this_index));
+          successor->RemovePhi(phi);
+        }
+      } else {
+        for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+          phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+        }
       }
     }
   }
   successors_.clear();
 
+  // (4) Remove instructions and phis. Instructions should have no remaining uses
+  //     except in catch phis. If an instruction is used by a catch phi at `index`,
+  //     remove `index`-th input of all phis in the catch block since they are
+  //     guaranteed dead. Note that we may miss dead inputs this way but the
+  //     graph will always remain consistent.
+  for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* insn = it.Current();
+    while (insn->HasUses()) {
+      DCHECK(IsTryBlock());
+      HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst();
+      size_t use_index = use->GetIndex();
+      HBasicBlock* user_block =  use->GetUser()->GetBlock();
+      DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock());
+      for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+        phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
+      }
+    }
+
+    RemoveInstruction(insn);
+  }
+  for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
+    RemovePhi(it.Current()->AsPhi());
+  }
+
   // Disconnect from the dominator.
   dominator_->RemoveDominatedBlock(this);
   SetDominator(nullptr);
 
-  // Delete from the graph. The function safely deletes remaining instructions
-  // and updates the reverse post order.
-  graph_->DeleteDeadBlock(this);
+  // Delete from the graph, update reverse post order.
+  graph_->DeleteDeadEmptyBlock(this);
   SetGraph(nullptr);
 }
 
@@ -1475,7 +1638,7 @@
   other->predecessors_.clear();
 
   // Delete `other` from the graph. The function updates reverse post order.
-  graph_->DeleteDeadBlock(other);
+  graph_->DeleteDeadEmptyBlock(other);
   other->SetGraph(nullptr);
 }
 
@@ -1539,19 +1702,14 @@
   std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
 }
 
-void HGraph::DeleteDeadBlock(HBasicBlock* block) {
+void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
   DCHECK_EQ(block->GetGraph(), this);
   DCHECK(block->GetSuccessors().empty());
   DCHECK(block->GetPredecessors().empty());
   DCHECK(block->GetDominatedBlocks().empty());
   DCHECK(block->GetDominator() == nullptr);
-
-  for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    block->RemoveInstruction(it.Current());
-  }
-  for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-    block->RemovePhi(it.Current()->AsPhi());
-  }
+  DCHECK(block->GetInstructions().IsEmpty());
+  DCHECK(block->GetPhis().IsEmpty());
 
   if (block->IsExitBlock()) {
     exit_block_ = nullptr;
@@ -1654,6 +1812,9 @@
     // (2) the reverse post order of that graph,
     // (3) the potential loop information they are now in,
     // (4) try block membership.
+    // Note that we do not need to update catch phi inputs because they
+    // correspond to the register file of the outer method which the inlinee
+    // cannot modify.
 
     // We don't add the entry block, the exit block, and the first block, which
     // has been merged with `at`.
@@ -1782,7 +1943,7 @@
  *             |
  *          if_block
  *           /    \
- *  dummy_block   deopt_block
+ *  true_block   false_block
  *           \    /
  *       new_pre_header
  *             |
@@ -1790,62 +1951,73 @@
  */
 void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
   DCHECK(header->IsLoopHeader());
-  HBasicBlock* pre_header = header->GetDominator();
+  HBasicBlock* old_pre_header = header->GetDominator();
 
-  // Need this to avoid critical edge.
+  // Need extra block to avoid critical edge.
   HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  // Need this to avoid critical edge.
-  HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc());
   HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
   AddBlock(if_block);
-  AddBlock(dummy_block);
-  AddBlock(deopt_block);
+  AddBlock(true_block);
+  AddBlock(false_block);
   AddBlock(new_pre_header);
 
-  header->ReplacePredecessor(pre_header, new_pre_header);
-  pre_header->successors_.clear();
-  pre_header->dominated_blocks_.clear();
+  header->ReplacePredecessor(old_pre_header, new_pre_header);
+  old_pre_header->successors_.clear();
+  old_pre_header->dominated_blocks_.clear();
 
-  pre_header->AddSuccessor(if_block);
-  if_block->AddSuccessor(dummy_block);  // True successor
-  if_block->AddSuccessor(deopt_block);  // False successor
-  dummy_block->AddSuccessor(new_pre_header);
-  deopt_block->AddSuccessor(new_pre_header);
+  old_pre_header->AddSuccessor(if_block);
+  if_block->AddSuccessor(true_block);  // True successor
+  if_block->AddSuccessor(false_block);  // False successor
+  true_block->AddSuccessor(new_pre_header);
+  false_block->AddSuccessor(new_pre_header);
 
-  pre_header->dominated_blocks_.push_back(if_block);
-  if_block->SetDominator(pre_header);
-  if_block->dominated_blocks_.push_back(dummy_block);
-  dummy_block->SetDominator(if_block);
-  if_block->dominated_blocks_.push_back(deopt_block);
-  deopt_block->SetDominator(if_block);
+  old_pre_header->dominated_blocks_.push_back(if_block);
+  if_block->SetDominator(old_pre_header);
+  if_block->dominated_blocks_.push_back(true_block);
+  true_block->SetDominator(if_block);
+  if_block->dominated_blocks_.push_back(false_block);
+  false_block->SetDominator(if_block);
   if_block->dominated_blocks_.push_back(new_pre_header);
   new_pre_header->SetDominator(if_block);
   new_pre_header->dominated_blocks_.push_back(header);
   header->SetDominator(new_pre_header);
 
+  // Fix reverse post order.
   size_t index_of_header = IndexOfElement(reverse_post_order_, header);
   MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1);
   reverse_post_order_[index_of_header++] = if_block;
-  reverse_post_order_[index_of_header++] = dummy_block;
-  reverse_post_order_[index_of_header++] = deopt_block;
+  reverse_post_order_[index_of_header++] = true_block;
+  reverse_post_order_[index_of_header++] = false_block;
   reverse_post_order_[index_of_header++] = new_pre_header;
 
-  HLoopInformation* info = pre_header->GetLoopInformation();
-  if (info != nullptr) {
-    if_block->SetLoopInformation(info);
-    dummy_block->SetLoopInformation(info);
-    deopt_block->SetLoopInformation(info);
-    new_pre_header->SetLoopInformation(info);
-    for (HLoopInformationOutwardIterator loop_it(*pre_header);
+  // Fix loop information.
+  HLoopInformation* loop_info = old_pre_header->GetLoopInformation();
+  if (loop_info != nullptr) {
+    if_block->SetLoopInformation(loop_info);
+    true_block->SetLoopInformation(loop_info);
+    false_block->SetLoopInformation(loop_info);
+    new_pre_header->SetLoopInformation(loop_info);
+    // Add blocks to all enveloping loops.
+    for (HLoopInformationOutwardIterator loop_it(*old_pre_header);
          !loop_it.Done();
          loop_it.Advance()) {
       loop_it.Current()->Add(if_block);
-      loop_it.Current()->Add(dummy_block);
-      loop_it.Current()->Add(deopt_block);
+      loop_it.Current()->Add(true_block);
+      loop_it.Current()->Add(false_block);
       loop_it.Current()->Add(new_pre_header);
     }
   }
+
+  // Fix try/catch information.
+  TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock()
+      ? old_pre_header->GetTryCatchInformation()
+      : nullptr;
+  if_block->SetTryCatchInformation(try_catch_info);
+  true_block->SetTryCatchInformation(try_catch_info);
+  false_block->SetTryCatchInformation(try_catch_info);
+  new_pre_header->SetTryCatchInformation(try_catch_info);
 }
 
 void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
@@ -1940,6 +2112,60 @@
   return !opt.GetDoesNotNeedDexCache();
 }
 
+void HInvokeStaticOrDirect::InsertInputAt(size_t index, HInstruction* input) {
+  inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
+  input->AddUseAt(this, index);
+  // Update indexes in use nodes of inputs that have been pushed further back by the insert().
+  for (size_t i = index + 1u, size = inputs_.size(); i != size; ++i) {
+    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i - 1u);
+    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  }
+}
+
+void HInvokeStaticOrDirect::RemoveInputAt(size_t index) {
+  RemoveAsUserOfInput(index);
+  inputs_.erase(inputs_.begin() + index);
+  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+  for (size_t i = index, e = InputCount(); i < e; ++i) {
+    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
+    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      return os << "string_init";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      return os << "recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      return os << "direct";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      return os << "direct_fixup";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      return os << "dex_cache_pc_relative";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      return os << "dex_cache_via_method";
+    default:
+      LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
+      return os << "explicit";
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit:
+      return os << "implicit";
+    case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
+      return os << "none";
+    default:
+      LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 void HInstruction::RemoveEnvironmentUsers() {
   for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
     HUseListNode<HEnvironment*>* user_node = use_it.Current();
@@ -1949,4 +2175,46 @@
   env_uses_.Clear();
 }
 
+// Returns an instruction with the opposite boolean value from 'cond'.
+HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) {
+  ArenaAllocator* allocator = GetArena();
+
+  if (cond->IsCondition() &&
+      !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) {
+    // Can't reverse floating point conditions.  We have to use HBooleanNot in that case.
+    HInstruction* lhs = cond->InputAt(0);
+    HInstruction* rhs = cond->InputAt(1);
+    HInstruction* replacement = nullptr;
+    switch (cond->AsCondition()->GetOppositeCondition()) {  // get *opposite*
+      case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break;
+      case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break;
+      case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break;
+      case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break;
+      case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break;
+      case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break;
+      case kCondB:  replacement = new (allocator) HBelow(lhs, rhs); break;
+      case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break;
+      case kCondA:  replacement = new (allocator) HAbove(lhs, rhs); break;
+      case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break;
+      default:
+        LOG(FATAL) << "Unexpected condition";
+        UNREACHABLE();
+    }
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  } else if (cond->IsIntConstant()) {
+    HIntConstant* int_const = cond->AsIntConstant();
+    if (int_const->IsZero()) {
+      return GetIntConstant(1);
+    } else {
+      DCHECK(int_const->IsOne());
+      return GetIntConstant(0);
+    }
+  } else {
+    HInstruction* replacement = new (allocator) HBooleanNot(cond);
+    cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+    return replacement;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f2c1cf..19614f1 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -35,6 +35,7 @@
 #include "mirror/class.h"
 #include "offsets.h"
 #include "primitive.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -240,8 +241,9 @@
   // put deoptimization instructions, etc.
   void TransformLoopHeaderForBCE(HBasicBlock* header);
 
-  // Removes `block` from the graph.
-  void DeleteDeadBlock(HBasicBlock* block);
+  // Removes `block` from the graph. Assumes `block` has been disconnected from
+  // other blocks and has no instructions or phis.
+  void DeleteDeadEmptyBlock(HBasicBlock* block);
 
   // Splits the edge between `block` and `successor` while preserving the
   // indices in the predecessor/successor lists. If there are multiple edges
@@ -350,8 +352,6 @@
 
   HCurrentMethod* GetCurrentMethod();
 
-  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
-
   const DexFile& GetDexFile() const {
     return dex_file_;
   }
@@ -371,6 +371,11 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  // Returns an instruction with the opposite boolean value from 'cond'.
+  // The instruction has been inserted into the graph, either as a constant, or
+  // before cursor.
+  HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
+
  private:
   void FindBackEdges(ArenaBitVector* visited);
   void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
@@ -661,6 +666,9 @@
     return successors_;
   }
 
+  ArrayRef<HBasicBlock* const> GetNormalSuccessors() const;
+  ArrayRef<HBasicBlock* const> GetExceptionalSuccessors() const;
+
   bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) {
     return ContainsElement(successors_, block, start_from);
   }
@@ -811,12 +819,6 @@
     return GetPredecessorIndexOf(predecessor) == idx;
   }
 
-  // Returns the number of non-exceptional successors. SsaChecker ensures that
-  // these are stored at the beginning of the successor list.
-  size_t NumberOfNormalSuccessors() const {
-    return EndsWithTryBoundary() ? 1 : GetSuccessors().size();
-  }
-
   // Create a new block between this block and its predecessors. The new block
   // is added to the graph, all predecessor edges are relinked to it and an edge
   // is created to `this`. Returns the new empty block. Reverse post order or
@@ -837,6 +839,15 @@
   // blocks are consistent (for example ending with a control flow instruction).
   HBasicBlock* SplitAfter(HInstruction* cursor);
 
+  // Split catch block into two blocks after the original move-exception bytecode
+  // instruction, or at the beginning if not present. Returns the newly created,
+  // latter block, or nullptr if such block could not be created (must be dead
+  // in that case). Note that this method just updates raw block information,
+  // like predecessors, successors, dominators, and instruction list. It does not
+  // update the graph, reverse post order, loop information, nor make sure the
+  // blocks are consistent (for example ending with a control flow instruction).
+  HBasicBlock* SplitCatchBlockAfterMoveException();
+
   // Merge `other` at the end of `this`. Successors and dominated blocks of
   // `other` are changed to be successors and dominated blocks of `this`. Note
   // that this method does not update the graph, reverse post order, loop
@@ -1084,13 +1095,20 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
-  M(Arm64IntermediateAddress, Instruction)
+  M(Arm64DataProcWithShifterOp, Instruction)                            \
+  M(Arm64IntermediateAddress, Instruction)                              \
+  M(Arm64MultiplyAccumulate, Instruction)
 #endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1430,7 +1448,7 @@
     return flags_ == (kAllChangeBits | kAllDependOnBits);
   }
 
-  // Returns true if this may read something written by other.
+  // Returns true if `this` may read something written by `other`.
   bool MayDependOn(SideEffects other) const {
     const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits;
     return (other.flags_ & depends_on_flags);
@@ -1620,6 +1638,11 @@
     return holder_;
   }
 
+
+  bool IsFromInlinedInvoke() const {
+    return GetParent() != nullptr;
+  }
+
  private:
   // Record instructions' use entries of this environment for constant-time removal.
   // It should only be called by HInstruction when a new environment use is added.
@@ -1725,6 +1748,13 @@
     return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
   }
 
+  bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    DCHECK(IsValid());
+    DCHECK(rti.IsValid());
+    return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
+        GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+  }
+
   // Returns true if the type information provide the same amount of details.
   // Note that it does not mean that the instructions have the same actual type
   // (because the type can be the result of a merge).
@@ -1927,6 +1957,14 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
@@ -2390,6 +2428,10 @@
   // Returns the block's non-exceptional successor (index zero).
   HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; }
 
+  ArrayRef<HBasicBlock* const> GetExceptionHandlers() const {
+    return ArrayRef<HBasicBlock* const>(GetBlock()->GetSuccessors()).SubArray(1u);
+  }
+
   // Returns whether `handler` is among its exception handlers (non-zero index
   // successors).
   bool HasExceptionHandler(const HBasicBlock& handler) const {
@@ -2417,25 +2459,6 @@
   DISALLOW_COPY_AND_ASSIGN(HTryBoundary);
 };
 
-// Iterator over exception handlers of a given HTryBoundary, i.e. over
-// exceptional successors of its basic block.
-class HExceptionHandlerIterator : public ValueObject {
- public:
-  explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary)
-    : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
-
-  bool Done() const { return index_ == block_.GetSuccessors().size(); }
-  HBasicBlock* Current() const { return block_.GetSuccessors()[index_]; }
-  size_t CurrentSuccessorIndex() const { return index_; }
-  void Advance() { ++index_; }
-
- private:
-  const HBasicBlock& block_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator);
-};
-
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
@@ -2604,6 +2627,11 @@
     VLOG(compiler) << DebugName() << " is not defined for the (long, int) case.";
     return nullptr;
   }
+  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                              HNullConstant* y ATTRIBUTE_UNUSED) const {
+    VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
+    return nullptr;
+  }
 
   // Returns an input that can legally be used as the right input and is
   // constant, or null.
@@ -2694,6 +2722,10 @@
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                      HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(1);
+  }
 
   DECLARE_INSTRUCTION(Equal);
 
@@ -2726,6 +2758,10 @@
     return GetBlock()->GetGraph()->GetIntConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
+  HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+                      HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(0);
+  }
 
   DECLARE_INSTRUCTION(NotEqual);
 
@@ -3227,7 +3263,7 @@
   void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
 
   bool IsFromInlinedInvoke() const {
-    return GetEnvironment()->GetParent() != nullptr;
+    return GetEnvironment()->IsFromInlinedInvoke();
   }
 
   bool CanThrow() const OVERRIDE { return true; }
@@ -3395,28 +3431,48 @@
                         MethodReference target_method,
                         DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type,
+                        InvokeType optimized_invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
-                // There is one extra argument for the HCurrentMethod node, and
-                // potentially one other if the clinit check is explicit, and one other
-                // if the method is a string factory.
-                1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u)
-                   + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+                // There is potentially one extra argument for the HCurrentMethod node, and
+                // potentially one other if the clinit check is explicit, and potentially
+                // one other if the method is a string factory.
+                (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
+                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) +
+                    (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
                 return_type,
                 dex_pc,
                 method_index,
                 original_invoke_type),
-        invoke_type_(invoke_type),
+        optimized_invoke_type_(optimized_invoke_type),
         clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
-        dispatch_info_(dispatch_info) {}
+        dispatch_info_(dispatch_info) { }
 
   void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+    bool had_current_method_input = HasCurrentMethodInput();
+    bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind);
+
+    // Using the current method is the default and once we find a better
+    // method load kind, we should not go back to using the current method.
+    DCHECK(had_current_method_input || !needs_current_method_input);
+
+    if (had_current_method_input && !needs_current_method_input) {
+      DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+      RemoveInputAt(GetSpecialInputIndex());
+    }
     dispatch_info_ = dispatch_info;
   }
 
+  void AddSpecialInput(HInstruction* input) {
+    // We allow only one special input.
+    DCHECK(!IsStringInit() && !HasCurrentMethodInput());
+    DCHECK(InputCount() == GetSpecialInputIndex() ||
+           (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
+    InsertInputAt(GetSpecialInputIndex(), input);
+  }
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
     // TODO: for intrinsics we can generate implicit null checks.
@@ -3427,19 +3483,42 @@
     return return_type_ == Primitive::kPrimNot && !IsStringInit();
   }
 
-  InvokeType GetInvokeType() const { return invoke_type_; }
+  // Get the index of the special input, if any.
+  //
+  // If the invoke IsStringInit(), it initially has a HFakeString special argument
+  // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(),
+  // the "special input" is the current method pointer; otherwise there may be one
+  // platform-specific special input, such as PC-relative addressing base.
+  uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+
+  InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
+  void SetOptimizedInvokeType(InvokeType invoke_type) {
+    optimized_invoke_type_ = invoke_type;
+  }
+
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
-  uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
-  bool HasPcRelDexCache() const {
+  bool HasPcRelativeDexCache() const {
     return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
   }
+  bool HasCurrentMethodInput() const {
+    // This function can be called only after the invoke has been fully initialized by the builder.
+    if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
+      DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
+      return true;
+    } else {
+      DCHECK(InputCount() == GetSpecialInputIndex() ||
+             !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
+      return false;
+    }
+  }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
+  void SetTargetMethod(MethodReference method) { target_method_ = method; }
 
   int32_t GetStringInitOffset() const {
     DCHECK(IsStringInit());
@@ -3452,7 +3531,7 @@
   }
 
   uint32_t GetDexCacheArrayOffset() const {
-    DCHECK(HasPcRelDexCache());
+    DCHECK(HasPcRelativeDexCache());
     return dispatch_info_.method_load_data;
   }
 
@@ -3465,29 +3544,28 @@
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
-    return GetInvokeType() == kStatic;
+    return GetOriginalInvokeType() == kStatic;
   }
 
-  // Remove the art::HLoadClass instruction set as last input by
-  // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
-  // the initial art::HClinitCheck instruction (only relevant for
-  // static calls with explicit clinit check).
-  void RemoveLoadClassAsLastInput() {
+  // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
+  // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck)
+  // instruction; only relevant for static calls with explicit clinit check.
+  void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) {
     DCHECK(IsStaticWithExplicitClinitCheck());
     size_t last_input_index = InputCount() - 1;
     HInstruction* last_input = InputAt(last_input_index);
     DCHECK(last_input != nullptr);
-    DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+    DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName();
     RemoveAsUserOfInput(last_input_index);
     inputs_.pop_back();
-    clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
-    DCHECK(IsStaticWithImplicitClinitCheck());
+    clinit_check_requirement_ = new_requirement;
+    DCHECK(!IsStaticWithExplicitClinitCheck());
   }
 
   bool IsStringFactoryFor(HFakeString* str) const {
     if (!IsStringInit()) return false;
-    // +1 for the current method.
-    if (InputCount() == (number_of_arguments_ + 1)) return false;
+    DCHECK(!HasCurrentMethodInput());
+    if (InputCount() == (number_of_arguments_)) return false;
     return InputAt(InputCount() - 1)->AsFakeString() == str;
   }
 
@@ -3502,7 +3580,7 @@
   }
 
   // Is this a call to a static method whose declaring class has an
-  // explicit intialization check in the graph?
+  // explicit initialization check in the graph?
   bool IsStaticWithExplicitClinitCheck() const {
     return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit);
   }
@@ -3513,6 +3591,11 @@
     return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit);
   }
 
+  // Does this method load kind need the current method as an input?
+  static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
+    return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod;
+  }
+
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  protected:
@@ -3530,8 +3613,11 @@
     return input_record;
   }
 
+  void InsertInputAt(size_t index, HInstruction* input);
+  void RemoveInputAt(size_t index);
+
  private:
-  const InvokeType invoke_type_;
+  InvokeType optimized_invoke_type_;
   ClinitCheckRequirement clinit_check_requirement_;
   // The target method may refer to different dex file or method index than the original
   // invoke. This happens for sharpened calls and for calls where a method was redeclared
@@ -3541,6 +3627,8 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
 class HInvokeVirtual : public HInvoke {
  public:
@@ -3595,18 +3683,24 @@
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
 };
 
-class HNewInstance : public HExpression<1> {
+class HNewInstance : public HExpression<2> {
  public:
-  HNewInstance(HCurrentMethod* current_method,
+  HNewInstance(HInstruction* cls,
+               HCurrentMethod* current_method,
                uint32_t dex_pc,
                uint16_t type_index,
                const DexFile& dex_file,
+               bool can_throw,
+               bool finalizable,
                QuickEntrypointEnum entrypoint)
       : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
+        can_throw_(can_throw),
+        finalizable_(finalizable),
         entrypoint_(entrypoint) {
-    SetRawInputAt(0, current_method);
+    SetRawInputAt(0, cls);
+    SetRawInputAt(1, current_method);
   }
 
   uint16_t GetTypeIndex() const { return type_index_; }
@@ -3614,22 +3708,30 @@
 
   // Calls runtime so needs an environment.
   bool NeedsEnvironment() const OVERRIDE { return true; }
-  // It may throw when called on:
-  //   - interfaces
-  //   - abstract/innaccessible/unknown classes
-  // TODO: optimize when possible.
-  bool CanThrow() const OVERRIDE { return true; }
+
+  // It may throw when called on type that's not instantiable/accessible.
+  // It can throw OOME.
+  // TODO: distinguish between the two cases so we can for example allow allocation elimination.
+  bool CanThrow() const OVERRIDE { return can_throw_ || true; }
+
+  bool IsFinalizable() const { return finalizable_; }
 
   bool CanBeNull() const OVERRIDE { return false; }
 
   QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
 
+  void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+    entrypoint_ = entrypoint;
+  }
+
   DECLARE_INSTRUCTION(NewInstance);
 
  private:
   const uint16_t type_index_;
   const DexFile& dex_file_;
-  const QuickEntrypointEnum entrypoint_;
+  const bool can_throw_;
+  const bool finalizable_;
+  QuickEntrypointEnum entrypoint_;
 
   DISALLOW_COPY_AND_ASSIGN(HNewInstance);
 };
@@ -4237,9 +4339,13 @@
       : HInstruction(SideEffects::None(), dex_pc),
         inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
         reg_number_(reg_number),
-        type_(type),
-        is_live_(false),
+        type_(ToPhiType(type)),
+        // Phis are constructed live and marked dead if conflicting or unused.
+        // Individual steps of SsaBuilder should assume that if a phi has been
+        // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+        is_live_(true),
         can_be_null_(true) {
+    DCHECK_NE(type_, Primitive::kPrimVoid);
   }
 
   // Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
@@ -4710,13 +4816,15 @@
              const DexFile& dex_file,
              bool is_referrers_class,
              uint32_t dex_pc,
-             bool needs_access_check)
+             bool needs_access_check,
+             bool is_in_dex_cache)
       : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
         type_index_(type_index),
         dex_file_(dex_file),
         is_referrers_class_(is_referrers_class),
         generate_clinit_check_(false),
         needs_access_check_(needs_access_check),
+        is_in_dex_cache_(is_in_dex_cache),
         loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
     // Referrers class should not need access check. We never inline unverified
     // methods so we can't possibly end up in this situation.
@@ -4741,14 +4849,13 @@
   bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
-    // Will call runtime and load the class if the class is not loaded yet.
-    // TODO: finer grain decision.
-    return !is_referrers_class_;
+    return CanCallRuntime();
   }
 
   bool MustGenerateClinitCheck() const {
     return generate_clinit_check_;
   }
+
   void SetMustGenerateClinitCheck(bool generate_clinit_check) {
     // The entrypoint the code generator is going to call does not do
     // clinit of the class.
@@ -4757,7 +4864,9 @@
   }
 
   bool CanCallRuntime() const {
-    return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_;
+    return MustGenerateClinitCheck() ||
+           (!is_referrers_class_ && !is_in_dex_cache_) ||
+           needs_access_check_;
   }
 
   bool NeedsAccessCheck() const {
@@ -4765,8 +4874,6 @@
   }
 
   bool CanThrow() const OVERRIDE {
-    // May call runtime and and therefore can throw.
-    // TODO: finer grain decision.
     return CanCallRuntime();
   }
 
@@ -4788,6 +4895,8 @@
     return SideEffects::CanTriggerGC();
   }
 
+  bool IsInDexCache() const { return is_in_dex_cache_; }
+
   DECLARE_INSTRUCTION(LoadClass);
 
  private:
@@ -4797,7 +4906,8 @@
   // Whether this instruction must generate the initialization check.
   // Used for code generation.
   bool generate_clinit_check_;
-  bool needs_access_check_;
+  const bool needs_access_check_;
+  const bool is_in_dex_cache_;
 
   ReferenceTypeInfo loaded_class_rti_;
 
@@ -4862,6 +4972,7 @@
     return true;
   }
 
+  bool CanThrow() const OVERRIDE { return true; }
 
   HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
 
@@ -5474,6 +5585,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..6a1dbb9
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc
new file mode 100644
index 0000000..ac2f093
--- /dev/null
+++ b/compiler/optimizing/nodes_arm64.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_arm64.h"
+#include "nodes.h"
+
+namespace art {
+
+using arm64::helpers::CanFitInShifterOperand;
+
+void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+                                                           /*out*/OpKind* op_kind,
+                                                           /*out*/int* shift_amount) {
+  DCHECK(CanFitInShifterOperand(instruction));
+  if (instruction->IsShl()) {
+    *op_kind = kLSL;
+    *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsShr()) {
+    *op_kind = kASR;
+    *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsUShr()) {
+    *op_kind = kLSR;
+    *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue();
+  } else {
+    DCHECK(instruction->IsTypeConversion());
+    Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType();
+    Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType();
+    int result_size = Primitive::ComponentSize(result_type);
+    int input_size = Primitive::ComponentSize(input_type);
+    int min_size = std::min(result_size, input_size);
+    // This follows the logic in
+    // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+      // There is actually nothing to do. The register will be used as a W
+      // register, discarding the top bits. This is represented by the default
+      // encoding 'LSL 0'.
+      *op_kind = kLSL;
+      *shift_amount = 0;
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      *op_kind = kUXTH;
+    } else {
+      switch (min_size) {
+        case 1: *op_kind = kSXTB; break;
+        case 2: *op_kind = kSXTH; break;
+        case 4: *op_kind = kSXTW; break;
+        default:
+          LOG(FATAL) << "Unexpected min size " << min_size;
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+  switch (op) {
+    case HArm64DataProcWithShifterOp::kLSL:  return os << "LSL";
+    case HArm64DataProcWithShifterOp::kLSR:  return os << "LSR";
+    case HArm64DataProcWithShifterOp::kASR:  return os << "ASR";
+    case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
+    case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
+    case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
+    case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
+    case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
+    case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+    default:
+      LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 885d3a2..e843935 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -19,6 +19,79 @@
 
 namespace art {
 
+class HArm64DataProcWithShifterOp : public HExpression<2> {
+ public:
+  enum OpKind {
+    kLSL,   // Logical shift left.
+    kLSR,   // Logical shift right.
+    kASR,   // Arithmetic shift right.
+    kUXTB,  // Unsigned extend byte.
+    kUXTH,  // Unsigned extend half-word.
+    kUXTW,  // Unsigned extend word.
+    kSXTB,  // Signed extend byte.
+    kSXTH,  // Signed extend half-word.
+    kSXTW,  // Signed extend word.
+
+    // Aliases.
+    kFirstShiftOp = kLSL,
+    kLastShiftOp = kASR,
+    kFirstExtensionOp = kUXTB,
+    kLastExtensionOp = kSXTW
+  };
+  HArm64DataProcWithShifterOp(HInstruction* instr,
+                              HInstruction* left,
+                              HInstruction* right,
+                              OpKind op,
+                              // The shift argument is unused if the operation
+                              // is an extension.
+                              int shift = 0,
+                              uint32_t dex_pc = kNoDexPc)
+      : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+        instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
+    DCHECK(!instr->HasSideEffects());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE {
+    HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+    return instr_kind_ == other->instr_kind_ &&
+        op_kind_ == other->op_kind_ &&
+        shift_amount_ == other->shift_amount_;
+  }
+
+  static bool IsShiftOp(OpKind op_kind) {
+    return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+  }
+
+  static bool IsExtensionOp(OpKind op_kind) {
+    return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+  }
+
+  // Find the operation kind and shift amount from a bitfield move instruction.
+  static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+                                       /*out*/OpKind* op_kind,
+                                       /*out*/int* shift_amount);
+
+  InstructionKind GetInstrKind() const { return instr_kind_; }
+  OpKind GetOpKind() const { return op_kind_; }
+  int GetShiftAmount() const { return shift_amount_; }
+
+  DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
+
+ private:
+  InstructionKind instr_kind_;
+  OpKind op_kind_;
+  int shift_amount_;
+
+  friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
+};
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
+
 // This instruction computes an intermediate address pointing in the 'middle' of an object. The
 // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
 // never used across anything that can trigger GC.
@@ -42,6 +115,40 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
+class HArm64MultiplyAccumulate : public HExpression<3> {
+ public:
+  HArm64MultiplyAccumulate(Primitive::Type type,
+                           InstructionKind op,
+                           HInstruction* accumulator,
+                           HInstruction* mul_left,
+                           HInstruction* mul_right,
+                           uint32_t dex_pc = kNoDexPc)
+      : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+    SetRawInputAt(kInputAccumulatorIndex, accumulator);
+    SetRawInputAt(kInputMulLeftIndex, mul_left);
+    SetRawInputAt(kInputMulRightIndex, mul_right);
+  }
+
+  static constexpr int kInputAccumulatorIndex = 0;
+  static constexpr int kInputMulLeftIndex = 1;
+  static constexpr int kInputMulRightIndex = 2;
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
+
+ private:
+  // Indicates if this is a MADD or MSUB.
+  InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 05c6b2c..2b0d522 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -23,6 +23,9 @@
 #include "optimizing/code_generator.h"
 #include "optimizing/optimizing_unit_test.h"
 #include "utils/assembler.h"
+#include "utils/arm/assembler_thumb2.h"
+#include "utils/mips/assembler_mips.h"
+#include "utils/mips64/assembler_mips64.h"
 
 #include "optimizing/optimizing_cfi_test_expected.inc"
 
@@ -36,52 +39,62 @@
   // Enable this flag to generate the expected outputs.
   static constexpr bool kGenerateExpected = false;
 
-  void TestImpl(InstructionSet isa, const char* isa_str,
-                const std::vector<uint8_t>& expected_asm,
-                const std::vector<uint8_t>& expected_cfi) {
+  OptimizingCFITest()
+      : pool_(),
+        allocator_(&pool_),
+        opts_(),
+        isa_features_(),
+        graph_(nullptr),
+        code_gen_(),
+        blocks_(allocator_.Adapter()) {}
+
+  void SetUpFrame(InstructionSet isa) {
     // Setup simple context.
-    ArenaPool pool;
-    ArenaAllocator allocator(&pool);
-    CompilerOptions opts;
-    std::unique_ptr<const InstructionSetFeatures> isa_features;
     std::string error;
-    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
-    HGraph* graph = CreateGraph(&allocator);
+    isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    graph_ = CreateGraph(&allocator_);
     // Generate simple frame with some spills.
-    std::unique_ptr<CodeGenerator> code_gen(
-        CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
+    code_gen_.reset(CodeGenerator::Create(graph_, isa, *isa_features_, opts_));
+    code_gen_->GetAssembler()->cfi().SetEnabled(true);
     const int frame_size = 64;
     int core_reg = 0;
     int fp_reg = 0;
     for (int i = 0; i < 2; i++) {  // Two registers of each kind.
       for (; core_reg < 32; core_reg++) {
-        if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+        if (code_gen_->IsCoreCalleeSaveRegister(core_reg)) {
           auto location = Location::RegisterLocation(core_reg);
-          code_gen->AddAllocatedRegister(location);
+          code_gen_->AddAllocatedRegister(location);
           core_reg++;
           break;
         }
       }
       for (; fp_reg < 32; fp_reg++) {
-        if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+        if (code_gen_->IsFloatingPointCalleeSaveRegister(fp_reg)) {
           auto location = Location::FpuRegisterLocation(fp_reg);
-          code_gen->AddAllocatedRegister(location);
+          code_gen_->AddAllocatedRegister(location);
           fp_reg++;
           break;
         }
       }
     }
-    ArenaVector<HBasicBlock*> blocks(allocator.Adapter());
-    code_gen->block_order_ = &blocks;
-    code_gen->ComputeSpillMask();
-    code_gen->SetFrameSize(frame_size);
-    code_gen->GenerateFrameEntry();
-    code_gen->GenerateFrameExit();
+    code_gen_->block_order_ = &blocks_;
+    code_gen_->ComputeSpillMask();
+    code_gen_->SetFrameSize(frame_size);
+    code_gen_->GenerateFrameEntry();
+  }
+
+  void Finish() {
+    code_gen_->GenerateFrameExit();
+    code_gen_->Finalize(&code_allocator_);
+  }
+
+  void Check(InstructionSet isa,
+             const char* isa_str,
+             const std::vector<uint8_t>& expected_asm,
+             const std::vector<uint8_t>& expected_cfi) {
     // Get the outputs.
-    InternalCodeAllocator code_allocator;
-    code_gen->Finalize(&code_allocator);
-    const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
-    Assembler* opt_asm = code_gen->GetAssembler();
+    const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+    Assembler* opt_asm = code_gen_->GetAssembler();
     const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
 
     if (kGenerateExpected) {
@@ -92,6 +105,19 @@
     }
   }
 
+  void TestImpl(InstructionSet isa, const char*
+                isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    SetUpFrame(isa);
+    Finish();
+    Check(isa, isa_str, expected_asm, expected_cfi);
+  }
+
+  CodeGenerator* GetCodeGenerator() {
+    return code_gen_.get();
+  }
+
  private:
   class InternalCodeAllocator : public CodeAllocator {
    public:
@@ -109,21 +135,111 @@
 
     DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
   };
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  CompilerOptions opts_;
+  std::unique_ptr<const InstructionSetFeatures> isa_features_;
+  HGraph* graph_;
+  std::unique_ptr<CodeGenerator> code_gen_;
+  ArenaVector<HBasicBlock*> blocks_;
+  InternalCodeAllocator code_allocator_;
 };
 
-#define TEST_ISA(isa) \
-  TEST_F(OptimizingCFITest, isa) { \
-    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
-        expected_asm_##isa + arraysize(expected_asm_##isa)); \
-    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
-        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
-    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+#define TEST_ISA(isa)                                         \
+  TEST_F(OptimizingCFITest, isa) {                            \
+    std::vector<uint8_t> expected_asm(                        \
+        expected_asm_##isa,                                   \
+        expected_asm_##isa + arraysize(expected_asm_##isa));  \
+    std::vector<uint8_t> expected_cfi(                        \
+        expected_cfi_##isa,                                   \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa));  \
+    TestImpl(isa, #isa, expected_asm, expected_cfi);          \
   }
 
 TEST_ISA(kThumb2)
 TEST_ISA(kArm64)
 TEST_ISA(kX86)
 TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+TEST_F(OptimizingCFITest, kThumb2Adjust) {
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kThumb2_adjust,
+      expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kThumb2_adjust,
+      expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust));
+  SetUpFrame(kThumb2);
+#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())->
+  Label target;
+  __ CompareAndBranchIfZero(arm::R0, &target);
+  // Push the target out of range of CBZ.
+  for (size_t i = 0; i != 65; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
+}
+
+TEST_F(OptimizingCFITest, kMipsAdjust) {
+  // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+  static constexpr size_t kNumNops = 1u + (1u << 15);
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kMips_adjust_head,
+      expected_asm_kMips_adjust_head + arraysize(expected_asm_kMips_adjust_head));
+  expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+  expected_asm.insert(
+      expected_asm.end(),
+      expected_asm_kMips_adjust_tail,
+      expected_asm_kMips_adjust_tail + arraysize(expected_asm_kMips_adjust_tail));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kMips_adjust,
+      expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust));
+  SetUpFrame(kMips);
+#define __ down_cast<mips::MipsAssembler*>(GetCodeGenerator()->GetAssembler())->
+  mips::MipsLabel target;
+  __ Beqz(mips::A0, &target);
+  // Push the target out of range of BEQZ.
+  for (size_t i = 0; i != kNumNops; ++i) {
+    __ Nop();
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
+}
+
+TEST_F(OptimizingCFITest, kMips64Adjust) {
+  // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+  static constexpr size_t kNumNops = 1u + (1u << 15);
+  std::vector<uint8_t> expected_asm(
+      expected_asm_kMips64_adjust_head,
+      expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head));
+  expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+  expected_asm.insert(
+      expected_asm.end(),
+      expected_asm_kMips64_adjust_tail,
+      expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail));
+  std::vector<uint8_t> expected_cfi(
+      expected_cfi_kMips64_adjust,
+      expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust));
+  SetUpFrame(kMips64);
+#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())->
+  mips64::Mips64Label target;
+  __ Beqc(mips64::A1, mips64::A2, &target);
+  // Push the target out of range of BEQC.
+  for (size_t i = 0; i != kNumNops; ++i) {
+    __ Nop();
+  }
+  __ Bind(&target);
+#undef __
+  Finish();
+  Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
+}
 
 #endif  // __ANDROID__
 
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 2c2c55f..de85729 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -138,3 +138,332 @@
 // 0x0000002c: ret
 // 0x0000002d: .cfi_restore_state
 // 0x0000002d: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
+    0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
+    0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
+    0x00, 0x00, 0xA4, 0xAF, 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7,
+    0x04, 0x00, 0xB5, 0xC7, 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7,
+    0x10, 0x00, 0xB0, 0x8F, 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F,
+    0x1C, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x54, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1,
+    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -28
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: sw r31, +24(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r17, +20(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-8
+// 0x0000000c: sw r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-12
+// 0x00000010: swc1 f22, +8(r29)
+// 0x00000014: swc1 f23, +12(r29)
+// 0x00000018: swc1 f20, +0(r29)
+// 0x0000001c: swc1 f21, +4(r29)
+// 0x00000020: addiu r29, r29, -36
+// 0x00000024: .cfi_def_cfa_offset: 64
+// 0x00000024: sw r4, +0(r29)
+// 0x00000028: .cfi_remember_state
+// 0x00000028: addiu r29, r29, 36
+// 0x0000002c: .cfi_def_cfa_offset: 28
+// 0x0000002c: lwc1 f20, +0(r29)
+// 0x00000030: lwc1 f21, +4(r29)
+// 0x00000034: lwc1 f22, +8(r29)
+// 0x00000038: lwc1 f23, +12(r29)
+// 0x0000003c: lw r16, +16(r29)
+// 0x00000040: .cfi_restore: r16
+// 0x00000040: lw r17, +20(r29)
+// 0x00000044: .cfi_restore: r17
+// 0x00000044: lw r31, +24(r29)
+// 0x00000048: .cfi_restore: r31
+// 0x00000048: addiu r29, r29, 28
+// 0x0000004c: .cfi_def_cfa_offset: 0
+// 0x0000004c: jr r31
+// 0x00000050: nop
+// 0x00000054: .cfi_restore_state
+// 0x00000054: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67,
+    0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
+    0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
+    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x4C, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x28, 0x4C, 0xD0, 0x44, 0xD1,
+    0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: .cfi_remember_state
+// 0x00000020: daddiu r29, r29, 24
+// 0x00000024: .cfi_def_cfa_offset: 40
+// 0x00000024: ldc1 f24, +0(r29)
+// 0x00000028: ldc1 f25, +8(r29)
+// 0x0000002c: ld r16, +16(r29)
+// 0x00000030: .cfi_restore: r16
+// 0x00000030: ld r17, +24(r29)
+// 0x00000034: .cfi_restore: r17
+// 0x00000034: ld r31, +32(r29)
+// 0x00000038: .cfi_restore: r31
+// 0x00000038: daddiu r29, r29, 40
+// 0x0000003c: .cfi_def_cfa_offset: 0
+// 0x0000003c: jr r31
+// 0x00000040: nop
+// 0x00000044: .cfi_restore_state
+// 0x00000044: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28,
+    0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+    0x0B, 0xB0, 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
+    0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
+    0x0E, 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: cmp r0, #0
+// 0x0000000c: beq +128 (0x00000090)
+// 0x0000000e: ldr r0, [r0, #0]
+// 0x00000010: ldr r0, [r0, #0]
+// 0x00000012: ldr r0, [r0, #0]
+// 0x00000014: ldr r0, [r0, #0]
+// 0x00000016: ldr r0, [r0, #0]
+// 0x00000018: ldr r0, [r0, #0]
+// 0x0000001a: ldr r0, [r0, #0]
+// 0x0000001c: ldr r0, [r0, #0]
+// 0x0000001e: ldr r0, [r0, #0]
+// 0x00000020: ldr r0, [r0, #0]
+// 0x00000022: ldr r0, [r0, #0]
+// 0x00000024: ldr r0, [r0, #0]
+// 0x00000026: ldr r0, [r0, #0]
+// 0x00000028: ldr r0, [r0, #0]
+// 0x0000002a: ldr r0, [r0, #0]
+// 0x0000002c: ldr r0, [r0, #0]
+// 0x0000002e: ldr r0, [r0, #0]
+// 0x00000030: ldr r0, [r0, #0]
+// 0x00000032: ldr r0, [r0, #0]
+// 0x00000034: ldr r0, [r0, #0]
+// 0x00000036: ldr r0, [r0, #0]
+// 0x00000038: ldr r0, [r0, #0]
+// 0x0000003a: ldr r0, [r0, #0]
+// 0x0000003c: ldr r0, [r0, #0]
+// 0x0000003e: ldr r0, [r0, #0]
+// 0x00000040: ldr r0, [r0, #0]
+// 0x00000042: ldr r0, [r0, #0]
+// 0x00000044: ldr r0, [r0, #0]
+// 0x00000046: ldr r0, [r0, #0]
+// 0x00000048: ldr r0, [r0, #0]
+// 0x0000004a: ldr r0, [r0, #0]
+// 0x0000004c: ldr r0, [r0, #0]
+// 0x0000004e: ldr r0, [r0, #0]
+// 0x00000050: ldr r0, [r0, #0]
+// 0x00000052: ldr r0, [r0, #0]
+// 0x00000054: ldr r0, [r0, #0]
+// 0x00000056: ldr r0, [r0, #0]
+// 0x00000058: ldr r0, [r0, #0]
+// 0x0000005a: ldr r0, [r0, #0]
+// 0x0000005c: ldr r0, [r0, #0]
+// 0x0000005e: ldr r0, [r0, #0]
+// 0x00000060: ldr r0, [r0, #0]
+// 0x00000062: ldr r0, [r0, #0]
+// 0x00000064: ldr r0, [r0, #0]
+// 0x00000066: ldr r0, [r0, #0]
+// 0x00000068: ldr r0, [r0, #0]
+// 0x0000006a: ldr r0, [r0, #0]
+// 0x0000006c: ldr r0, [r0, #0]
+// 0x0000006e: ldr r0, [r0, #0]
+// 0x00000070: ldr r0, [r0, #0]
+// 0x00000072: ldr r0, [r0, #0]
+// 0x00000074: ldr r0, [r0, #0]
+// 0x00000076: ldr r0, [r0, #0]
+// 0x00000078: ldr r0, [r0, #0]
+// 0x0000007a: ldr r0, [r0, #0]
+// 0x0000007c: ldr r0, [r0, #0]
+// 0x0000007e: ldr r0, [r0, #0]
+// 0x00000080: ldr r0, [r0, #0]
+// 0x00000082: ldr r0, [r0, #0]
+// 0x00000084: ldr r0, [r0, #0]
+// 0x00000086: ldr r0, [r0, #0]
+// 0x00000088: ldr r0, [r0, #0]
+// 0x0000008a: ldr r0, [r0, #0]
+// 0x0000008c: ldr r0, [r0, #0]
+// 0x0000008e: ldr r0, [r0, #0]
+// 0x00000090: .cfi_remember_state
+// 0x00000090: add sp, sp, #44
+// 0x00000092: .cfi_def_cfa_offset: 20
+// 0x00000092: vpop.f32 {s16-s17}
+// 0x00000096: .cfi_def_cfa_offset: 12
+// 0x00000096: .cfi_restore_extended: r80
+// 0x00000096: .cfi_restore_extended: r81
+// 0x00000096: pop {r5, r6, pc}
+// 0x00000098: .cfi_restore_state
+// 0x00000098: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
+    0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
+    0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
+    0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
+    0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+    0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
+    0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
+    0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
+};
+static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
+    0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, 0x04, 0x00, 0xB5, 0xC7,
+    0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, 0x10, 0x00, 0xB0, 0x8F,
+    0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, 0x1C, 0x00, 0xBD, 0x27,
+    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips_adjust[] = {
+    0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+    0x54, 0x0E, 0x40, 0x4C, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00,
+    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
+    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -28
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: sw r31, +24(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r17, +20(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-8
+// 0x0000000c: sw r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-12
+// 0x00000010: swc1 f22, +8(r29)
+// 0x00000014: swc1 f23, +12(r29)
+// 0x00000018: swc1 f20, +0(r29)
+// 0x0000001c: swc1 f21, +4(r29)
+// 0x00000020: addiu r29, r29, -36
+// 0x00000024: .cfi_def_cfa_offset: 64
+// 0x00000024: sw r4, +0(r29)
+// 0x00000028: bne r0, r4, 0x0000004c ; +36
+// 0x0000002c: addiu r29, r29, -4
+// 0x00000030: .cfi_def_cfa_offset: 68
+// 0x00000030: sw r31, +0(r29)
+// 0x00000034: bltzal r0, 0x00000038 ; +4
+// 0x00000038: lui r1, 0x20000
+// 0x0000003c: ori r1, r1, 24
+// 0x00000040: addu r1, r1, r31
+// 0x00000044: lw r31, +0(r29)
+// 0x00000048: jr r1
+// 0x0000004c: addiu r29, r29, 4
+// 0x00000050: .cfi_def_cfa_offset: 64
+// 0x00000050: nop
+//             ...
+// 0x00020050: nop
+// 0x00020054: .cfi_remember_state
+// 0x00020054: addiu r29, r29, 36
+// 0x00020058: .cfi_def_cfa_offset: 28
+// 0x00020058: lwc1 f20, +0(r29)
+// 0x0002005c: lwc1 f21, +4(r29)
+// 0x00020060: lwc1 f22, +8(r29)
+// 0x00020064: lwc1 f23, +12(r29)
+// 0x00020068: lw r16, +16(r29)
+// 0x0002006c: .cfi_restore: r16
+// 0x0002006c: lw r17, +20(r29)
+// 0x00020070: .cfi_restore: r17
+// 0x00020070: lw r31, +24(r29)
+// 0x00020074: .cfi_restore: r31
+// 0x00020074: addiu r29, r29, 28
+// 0x00020078: .cfi_def_cfa_offset: 0
+// 0x00020078: jr r31
+// 0x0002007c: nop
+// 0x00020080: .cfi_restore_state
+// 0x00020080: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
+    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60,
+    0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+};
+static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
+    0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
+    0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
+    0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
+    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28,
+    0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: bnec r5, r6, 0x0000002c ; +12
+// 0x00000024: auipc r1, 2
+// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080
+// 0x0000002c: nop
+//             ...
+// 0x0002002c: nop
+// 0x00020030: .cfi_remember_state
+// 0x00020030: daddiu r29, r29, 24
+// 0x00020034: .cfi_def_cfa_offset: 40
+// 0x00020034: ldc1 f24, +0(r29)
+// 0x00020038: ldc1 f25, +8(r29)
+// 0x0002003c: ld r16, +16(r29)
+// 0x00020040: .cfi_restore: r16
+// 0x00020040: ld r17, +24(r29)
+// 0x00020044: .cfi_restore: r17
+// 0x00020044: ld r31, +32(r29)
+// 0x00020048: .cfi_restore: r31
+// 0x00020048: daddiu r29, r29, 40
+// 0x0002004c: .cfi_def_cfa_offset: 0
+// 0x0002004c: jr r31
+// 0x00020050: nop
+// 0x00020054: .cfi_restore_state
+// 0x00020054: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8cb2cfc..cae2d3f 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,11 +20,15 @@
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_x86
-#include "constant_area_fixups_x86.h"
+#include "pc_relative_fixups_x86.h"
 #endif
 
 #include "art_method-inl.h"
@@ -56,6 +60,7 @@
 #include "inliner.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/jit_code_cache.h"
 #include "licm.h"
 #include "jni/quick/jni_compiler.h"
 #include "load_store_elimination.h"
@@ -109,24 +114,23 @@
 class PassObserver : public ValueObject {
  public:
   PassObserver(HGraph* graph,
-               const char* method_name,
                CodeGenerator* codegen,
                std::ostream* visualizer_output,
                CompilerDriver* compiler_driver)
       : graph_(graph),
-        method_name_(method_name),
+        cached_method_name_(),
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
-        timing_logger_(method_name, true, true),
+        timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
         visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
         visualizer_(visualizer_output, graph, *codegen),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
-      if (!IsVerboseMethod(compiler_driver, method_name)) {
+      if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
         timing_logger_enabled_ = visualizer_enabled_ = false;
       }
       if (visualizer_enabled_) {
-        visualizer_.PrintHeader(method_name_);
+        visualizer_.PrintHeader(GetMethodName());
         codegen->SetDisassemblyInformation(&disasm_info_);
       }
     }
@@ -134,7 +138,7 @@
 
   ~PassObserver() {
     if (timing_logger_enabled_) {
-      LOG(INFO) << "TIMINGS " << method_name_;
+      LOG(INFO) << "TIMINGS " << GetMethodName();
       LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
     }
   }
@@ -147,6 +151,14 @@
 
   void SetGraphInBadState() { graph_in_bad_state_ = true; }
 
+  const char* GetMethodName() {
+    // PrettyMethod() is expensive, so we delay calling it until we actually have to.
+    if (cached_method_name_.empty()) {
+      cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+    }
+    return cached_method_name_.c_str();
+  }
+
  private:
   void StartPass(const char* pass_name) {
     // Dump graph first, then start timer.
@@ -205,7 +217,8 @@
   }
 
   HGraph* const graph_;
-  const char* method_name_;
+
+  std::string cached_method_name_;
 
   bool timing_logger_enabled_;
   TimingLogger timing_logger_;
@@ -258,15 +271,6 @@
                           const DexFile& dex_file,
                           Handle<mirror::DexCache> dex_cache) const OVERRIDE;
 
-  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
-                             uint32_t access_flags,
-                             InvokeType invoke_type,
-                             uint16_t class_def_idx,
-                             uint32_t method_idx,
-                             jobject class_loader,
-                             const DexFile& dex_file,
-                             Handle<mirror::DexCache> dex_cache) const;
-
   CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
                              const DexFile& dex_file) const OVERRIDE {
@@ -291,23 +295,45 @@
     }
   }
 
+  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+      OVERRIDE
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Whether we should run any optimization or register allocation. If false, will
   // just run the code generation after the graph was built.
   const bool run_optimizations_;
 
-  // Optimize and compile `graph`.
-  CompiledMethod* CompileOptimized(HGraph* graph,
-                                   CodeGenerator* codegen,
-                                   CompilerDriver* driver,
-                                   const DexCompilationUnit& dex_compilation_unit,
-                                   PassObserver* pass_observer) const;
+  // Create a 'CompiledMethod' for an optimized graph.
+  CompiledMethod* EmitOptimized(ArenaAllocator* arena,
+                                CodeVectorAllocator* code_allocator,
+                                CodeGenerator* codegen,
+                                CompilerDriver* driver) const;
 
-  // Just compile without doing optimizations.
-  CompiledMethod* CompileBaseline(CodeGenerator* codegen,
-                                  CompilerDriver* driver,
-                                  const DexCompilationUnit& dex_compilation_unit,
-                                  PassObserver* pass_observer) const;
+  // Create a 'CompiledMethod' for a non-optimized graph.
+  CompiledMethod* EmitBaseline(ArenaAllocator* arena,
+                               CodeVectorAllocator* code_allocator,
+                               CodeGenerator* codegen,
+                               CompilerDriver* driver) const;
+
+  // Try compiling a method and return the code generator used for
+  // compiling it.
+  // This method:
+  // 1) Builds the graph. Returns null if it failed to build it.
+  // 2) If `run_optimizations_` is set:
+  //    2.1) Transform the graph to SSA. Returns null if it failed.
+  //    2.2) Run optimizations on the graph, including register allocator.
+  // 3) Generate code with the `code_allocator` provided.
+  CodeGenerator* TryCompile(ArenaAllocator* arena,
+                            CodeVectorAllocator* code_allocator,
+                            const DexFile::CodeItem* code_item,
+                            uint32_t access_flags,
+                            InvokeType invoke_type,
+                            uint16_t class_def_idx,
+                            uint32_t method_idx,
+                            jobject class_loader,
+                            const DexFile& dex_file,
+                            Handle<mirror::DexCache> dex_cache) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
@@ -369,6 +395,15 @@
       || instruction_set == kX86_64;
 }
 
+// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
+// TODO: Add support for other architectures and remove this function
+static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
+  return instruction_set == kArm64
+      || instruction_set == kThumb2
+      || instruction_set == kX86
+      || instruction_set == kX86_64;
+}
+
 static void RunOptimizations(HOptimization* optimizations[],
                              size_t length,
                              PassObserver* pass_observer) {
@@ -391,20 +426,9 @@
   if (!should_inline) {
     return;
   }
-
-  ArenaAllocator* arena = graph->GetArena();
-  HInliner* inliner = new (arena) HInliner(
+  HInliner* inliner = new (graph->GetArena()) HInliner(
     graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
-  ReferenceTypePropagation* type_propagation =
-    new (arena) ReferenceTypePropagation(graph, handles,
-        "reference_type_propagation_after_inlining");
-
-  HOptimization* optimizations[] = {
-    inliner,
-    // Run another type propagation phase: inlining will open up more opportunities
-    // to remove checkcast/instanceof and null checks.
-    type_propagation,
-  };
+  HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
 }
@@ -415,6 +439,17 @@
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      HOptimization* arm_optimizations[] = {
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
@@ -432,10 +467,9 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86: {
-      x86::ConstantAreaFixups* constant_area_fixups =
-          new (arena) x86::ConstantAreaFixups(graph, stats);
+      x86::PcRelativeFixups* pc_relative_fixups = new (arena) x86::PcRelativeFixups(graph, stats);
       HOptimization* x86_optimizations[] = {
-        constant_area_fixups
+          pc_relative_fixups
       };
       RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
       break;
@@ -446,13 +480,32 @@
   }
 }
 
+NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
+static void AllocateRegisters(HGraph* graph,
+                              CodeGenerator* codegen,
+                              PassObserver* pass_observer) {
+  PrepareForRegisterAllocation(graph).Run();
+  SsaLivenessAnalysis liveness(graph, codegen);
+  {
+    PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
+    liveness.Analyze();
+  }
+  {
+    PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
+    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+  }
+}
+
 static void RunOptimizations(HGraph* graph,
                              CodeGenerator* codegen,
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
                              const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer,
-                             StackHandleScopeCollection* handles) {
+                             PassObserver* pass_observer) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  ScopedThreadSuspension sts(soa.Self(), kNative);
+
   ArenaAllocator* arena = graph->GetArena();
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -462,14 +515,15 @@
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   ReferenceTypePropagation* type_propagation =
-      new (arena) ReferenceTypePropagation(graph, handles);
+      new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_types");
@@ -492,12 +546,13 @@
 
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
 
   // TODO: Update passes incompatible with try/catch so we have the same
   //       pipeline for all methods.
   if (graph->HasTryCatch()) {
     HOptimization* optimizations2[] = {
+      boolean_simplify,
       side_effects,
       gvn,
       dce2,
@@ -519,6 +574,7 @@
       licm,
       induction,
       bce,
+      fold3,  // evaluates code generated by dynamic bce
       simplify3,
       lse,
       dce2,
@@ -532,6 +588,7 @@
   }
 
   RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
+  AllocateRegisters(graph, codegen, pass_observer);
 }
 
 // The stack map we generate must be 4-byte aligned on ARM. Since existing
@@ -545,22 +602,6 @@
   return ArrayRef<const uint8_t>(vector);
 }
 
-NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
-static void AllocateRegisters(HGraph* graph,
-                              CodeGenerator* codegen,
-                              PassObserver* pass_observer) {
-  PrepareForRegisterAllocation(graph).Run();
-  SsaLivenessAnalysis liveness(graph, codegen);
-  {
-    PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
-    liveness.Analyze();
-  }
-  {
-    PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
-  }
-}
-
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -574,74 +615,40 @@
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
-                                                     CodeGenerator* codegen,
-                                                     CompilerDriver* compiler_driver,
-                                                     const DexCompilationUnit& dex_compilation_unit,
-                                                     PassObserver* pass_observer) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  soa.Self()->TransitionFromRunnableToSuspended(kNative);
-  RunOptimizations(graph,
-                   codegen,
-                   compiler_driver,
-                   compilation_stats_.get(),
-                   dex_compilation_unit,
-                   pass_observer,
-                   &handles);
-
-  AllocateRegisters(graph, codegen, pass_observer);
-
-  ArenaAllocator* arena = graph->GetArena();
-  CodeVectorAllocator allocator(arena);
-  DefaultSrcMap src_mapping_table;
-  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
-                         ? &src_mapping_table
-                         : nullptr);
-  codegen->CompileOptimized(&allocator);
-
+CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
+                                                  CodeVectorAllocator* code_allocator,
+                                                  CodeGenerator* codegen,
+                                                  CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
-  codegen->BuildStackMaps(&stack_map);
-
-  MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
+  stack_map.resize(codegen->ComputeStackMapsSize());
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(allocator.GetMemory()),
+      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(src_mapping_table),
+      ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
       ArrayRef<const uint8_t>(),  // mapping_table.
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(),  // native_gc_map.
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
-  pass_observer->DumpDisassembly();
 
-  soa.Self()->TransitionFromSuspendedToRunnable();
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::CompileBaseline(
+CompiledMethod* OptimizingCompiler::EmitBaseline(
+    ArenaAllocator* arena,
+    CodeVectorAllocator* code_allocator,
     CodeGenerator* codegen,
-    CompilerDriver* compiler_driver,
-    const DexCompilationUnit& dex_compilation_unit,
-    PassObserver* pass_observer) const {
-  ArenaAllocator* arena = codegen->GetGraph()->GetArena();
-  CodeVectorAllocator allocator(arena);
-  DefaultSrcMap src_mapping_table;
-  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
-                         ? &src_mapping_table
-                         : nullptr);
-  codegen->CompileBaseline(&allocator);
-
+    CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
 
   ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
@@ -649,44 +656,43 @@
   ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildVMapTable(&vmap_table);
   ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
+  codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(allocator.GetMemory()),
+      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(src_mapping_table),
+      ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
       AlignVectorSize(mapping_table),
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
-  pass_observer->DumpDisassembly();
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
-                                               uint32_t access_flags,
-                                               InvokeType invoke_type,
-                                               uint16_t class_def_idx,
-                                               uint32_t method_idx,
-                                               jobject class_loader,
-                                               const DexFile& dex_file,
-                                               Handle<mirror::DexCache> dex_cache) const {
-  std::string method_name = PrettyMethod(method_idx, dex_file);
+CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
+                                              CodeVectorAllocator* code_allocator,
+                                              const DexFile::CodeItem* code_item,
+                                              uint32_t access_flags,
+                                              InvokeType invoke_type,
+                                              uint16_t class_def_idx,
+                                              uint32_t method_idx,
+                                              jobject class_loader,
+                                              const DexFile& dex_file,
+                                              Handle<mirror::DexCache> dex_cache) const {
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
   InstructionSet instruction_set = compiler_driver->GetInstructionSet();
 
-  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
-  // overflow checks) assume thumb2.
+  // Always use the Thumb-2 assembler: some runtime functionality
+  // (like implicit stack overflow checks) assume Thumb-2.
   if (instruction_set == kArm) {
     instruction_set = kThumb2;
   }
@@ -697,6 +703,12 @@
     return nullptr;
   }
 
+  // When read barriers are enabled, do not attempt to compile for
+  // instruction sets that have no read barrier support.
+  if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) {
+    return nullptr;
+  }
+
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
@@ -721,13 +733,10 @@
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
                                                      dex_compilation_unit.GetDexFile(),
                                                      dex_compilation_unit.GetClassDefIndex());
-  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
-  HGraph* graph = new (&arena) HGraph(
-      &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
+  HGraph* graph = new (arena) HGraph(
+      arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
       kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
 
-  bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
-
   std::unique_ptr<CodeGenerator> codegen(
       CodeGenerator::Create(graph,
                             instruction_set,
@@ -741,7 +750,6 @@
       compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
 
   PassObserver pass_observer(graph,
-                             method_name.c_str(),
                              codegen.get(),
                              visualizer_output_.get(),
                              compiler_driver);
@@ -769,7 +777,7 @@
                         interpreter_metadata,
                         dex_cache);
 
-  VLOG(compiler) << "Building " << method_name;
+  VLOG(compiler) << "Building " << pass_observer.GetMethodName();
 
   {
     PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
@@ -779,58 +787,40 @@
     }
   }
 
-  bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
-
-  // `run_optimizations_` is set explicitly (either through a compiler filter
-  // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
-  // to Quick.
-  bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
-  CompiledMethod* compiled_method = nullptr;
-  if (run_optimizations_ && can_allocate_registers) {
-    VLOG(compiler) << "Optimizing " << method_name;
-
+  VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
+  if (run_optimizations_) {
     {
       PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
       if (!graph->TryBuildingSsa()) {
         // We could not transform the graph to SSA, bailout.
-        LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
+        LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
+            << ": it contains a non natural loop";
         MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
         pass_observer.SetGraphInBadState();
         return nullptr;
       }
     }
 
-    compiled_method = CompileOptimized(graph,
-                                       codegen.get(),
-                                       compiler_driver,
-                                       dex_compilation_unit,
-                                       &pass_observer);
-  } else if (shouldOptimize && can_allocate_registers) {
-    LOG(FATAL) << "Could not allocate registers in optimizing compiler";
-    UNREACHABLE();
-  } else if (can_use_baseline) {
-    VLOG(compiler) << "Compile baseline " << method_name;
-
-    if (!run_optimizations_) {
-      MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled);
-    } else if (!can_allocate_registers) {
-      MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
-    }
-
-    compiled_method = CompileBaseline(codegen.get(),
-                                      compiler_driver,
-                                      dex_compilation_unit,
-                                      &pass_observer);
+    RunOptimizations(graph,
+                     codegen.get(),
+                     compiler_driver,
+                     compilation_stats_.get(),
+                     dex_compilation_unit,
+                     &pass_observer);
+    codegen->CompileOptimized(code_allocator);
+  } else {
+    codegen->CompileBaseline(code_allocator);
   }
+  pass_observer.DumpDisassembly();
 
   if (kArenaAllocatorCountAllocations) {
-    if (arena.BytesAllocated() > 4 * MB) {
-      MemStats mem_stats(arena.GetMemStats());
+    if (arena->BytesAllocated() > 4 * MB) {
+      MemStats mem_stats(arena->GetMemStats());
       LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
     }
   }
 
-  return compiled_method;
+  return codegen.release();
 }
 
 static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
@@ -852,33 +842,50 @@
                                             Handle<mirror::DexCache> dex_cache) const {
   CompilerDriver* compiler_driver = GetCompilerDriver();
   CompiledMethod* method = nullptr;
-  if (Runtime::Current()->IsAotCompiler()) {
-    const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
-    DCHECK(!verified_method->HasRuntimeThrow());
-    if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-        || CanHandleVerificationFailure(verified_method)) {
-       method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                           method_idx, jclass_loader, dex_file, dex_cache);
-    } else {
-      if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
+  DCHECK(!verified_method->HasRuntimeThrow());
+  if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
+      || CanHandleVerificationFailure(verified_method)) {
+    ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+    CodeVectorAllocator code_allocator(&arena);
+    std::unique_ptr<CodeGenerator> codegen(
+        TryCompile(&arena,
+                   &code_allocator,
+                   code_item,
+                   access_flags,
+                   invoke_type,
+                   class_def_idx,
+                   method_idx,
+                   jclass_loader,
+                   dex_file,
+                   dex_cache));
+    if (codegen.get() != nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kCompiled);
+      if (run_optimizations_) {
+        method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
       } else {
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+        method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
       }
     }
   } else {
-    // This is for the JIT compiler, which has already ensured the class is verified.
-    // We can go straight to compiling.
-    DCHECK(Runtime::Current()->UseJit());
-    method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                        method_idx, jclass_loader, dex_file, dex_cache);
+    if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+    } else {
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
+    }
   }
 
   if (kIsDebugBuild &&
       IsCompilingWithCoreImage() &&
-      IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
-    // For testing purposes, we put a special marker on method names that should be compiled
-    // with this compiler. This makes sure we're not regressing.
+      IsInstructionSetSupported(compiler_driver->GetInstructionSet()) &&
+      (!kEmitCompilerReadBarrier ||
+       InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) {
+    // For testing purposes, we put a special marker on method names
+    // that should be compiled with this compiler (when the the
+    // instruction set is supported -- and has support for read
+    // barriers, if they are enabled). This makes sure we're not
+    // regressing.
     std::string method_name = PrettyMethod(method_idx, dex_file);
     bool shouldCompile = method_name.find("$opt$") != std::string::npos;
     DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
@@ -896,4 +903,70 @@
   return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art");
 }
 
+bool OptimizingCompiler::JitCompile(Thread* self,
+                                    jit::JitCodeCache* code_cache,
+                                    ArtMethod* method) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      method->GetDeclaringClass()->GetClassLoader()));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+
+  jobject jclass_loader = class_loader.ToJObject();
+  const DexFile* dex_file = method->GetDexFile();
+  const uint16_t class_def_idx = method->GetClassDefIndex();
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+  const uint32_t method_idx = method->GetDexMethodIndex();
+  const uint32_t access_flags = method->GetAccessFlags();
+  const InvokeType invoke_type = method->GetInvokeType();
+
+  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+  CodeVectorAllocator code_allocator(&arena);
+  std::unique_ptr<CodeGenerator> codegen;
+  {
+    // Go to native so that we don't block GC during compilation.
+    ScopedThreadSuspension sts(self, kNative);
+
+    DCHECK(run_optimizations_);
+    codegen.reset(
+        TryCompile(&arena,
+                   &code_allocator,
+                   code_item,
+                   access_flags,
+                   invoke_type,
+                   class_def_idx,
+                   method_idx,
+                   jclass_loader,
+                   *dex_file,
+                   dex_cache));
+    if (codegen.get() == nullptr) {
+      return false;
+    }
+  }
+
+  size_t stack_map_size = codegen->ComputeStackMapsSize();
+  uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size);
+  if (stack_map_data == nullptr) {
+    return false;
+  }
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+  const void* code = code_cache->CommitCode(
+      self,
+      method,
+      nullptr,
+      stack_map_data,
+      nullptr,
+      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+      codegen->GetCoreSpillMask(),
+      codegen->GetFpuSpillMask(),
+      code_allocator.GetMemory().data(),
+      code_allocator.GetSize());
+
+  if (code == nullptr) {
+    code_cache->ClearData(self, stack_map_data);
+    return false;
+  }
+
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..e5ea0f5 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
-#include <sstream>
+#include <iomanip>
 #include <string>
 #include <type_traits>
 
@@ -27,18 +27,18 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
-  kCompiledBaseline,
-  kCompiledOptimized,
+  kCompiled,
   kInlinedInvoke,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
   kUnresolvedField,
   kUnresolvedFieldNotAFastAccess,
+  kRemovedCheckedCast,
+  kRemovedDeadInstruction,
+  kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
   kNotCompiledCannotBuildSSA,
-  kNotCompiledCantAccesType,
-  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -47,13 +47,8 @@
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
   kNotCompiledUnsupportedIsa,
+  kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
-  kNotOptimizedDisabled,
-  kNotOptimizedRegisterAllocator,
-  kNotOptimizedTryCatch,
-  kRemovedCheckedCast,
-  kRemovedDeadInstruction,
-  kRemovedNullCheck,
   kLastStat
 };
 
@@ -66,20 +61,19 @@
   }
 
   void Log() const {
+    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+      // Log only in debug builds or if the compiler is verbose.
+      return;
+    }
+
     if (compile_stats_[kAttemptCompilation] == 0) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      size_t unoptimized_percent =
-          compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
-      size_t optimized_percent =
-          compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      std::ostringstream oss;
-      oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
-      oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
-      oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
-      LOG(INFO) << oss.str();
+      float compiled_percent =
+          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+          << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
       for (int i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
@@ -92,41 +86,38 @@
 
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+    std::string name;
     switch (stat) {
-      case kAttemptCompilation : return "kAttemptCompilation";
-      case kCompiledBaseline : return "kCompiledBaseline";
-      case kCompiledOptimized : return "kCompiledOptimized";
-      case kInlinedInvoke : return "kInlinedInvoke";
-      case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
-      case kUnresolvedMethod : return "kUnresolvedMethod";
-      case kUnresolvedField : return "kUnresolvedField";
-      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
-      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
-      case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
-      case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
-      case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
-      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kRemovedCheckedCast: return "kRemovedCheckedCast";
-      case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
-      case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCompiled : name = "Compiled"; break;
+      case kInlinedInvoke : name = "InlinedInvoke"; break;
+      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+      case kUnresolvedField : name = "UnresolvedField"; break;
+      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
 
-      case kLastStat: break;  // Invalid to print out.
+      case kLastStat:
+        LOG(FATAL) << "invalid stat "
+            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+        UNREACHABLE();
     }
-    LOG(FATAL) << "invalid stat "
-        << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-    UNREACHABLE();
+    return "OptStat#" + name;
   }
 
   AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
similarity index 65%
rename from compiler/optimizing/constant_area_fixups_x86.cc
rename to compiler/optimizing/pc_relative_fixups_x86.cc
index c347000..b383f1e 100644
--- a/compiler/optimizing/constant_area_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "constant_area_fixups_x86.h"
+#include "pc_relative_fixups_x86.h"
 
 namespace art {
 namespace x86 {
@@ -22,9 +22,18 @@
 /**
  * Finds instructions that need the constant area base as an input.
  */
-class ConstantHandlerVisitor : public HGraphVisitor {
+class PCRelativeHandlerVisitor : public HGraphVisitor {
  public:
-  explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+  explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
 
  private:
   void VisitAdd(HAdd* add) OVERRIDE {
@@ -72,7 +81,7 @@
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
-    InitializeConstantAreaPointer(switch_insn);
+    InitializePCRelativeBasePointer();
     HGraph* graph = GetGraph();
     HBasicBlock* block = switch_insn->GetBlock();
     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,31 +93,38 @@
     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
   }
 
-  void InitializeConstantAreaPointer(HInstruction* user) {
+  void InitializePCRelativeBasePointer() {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
       return;
     }
 
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
     DCHECK(base_ != nullptr);
   }
 
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializeConstantAreaPointer(insn);
-    HGraph* graph = GetGraph();
-    HBasicBlock* block = insn->GetBlock();
+    InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
-        new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
-    block->InsertInstructionBefore(load_constant, insn);
+        new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
+    insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
     insn->ReplaceInput(load_constant, input_index);
   }
 
   void HandleInvoke(HInvoke* invoke) {
+    // If this is an invoke-static/-direct with PC-relative dex cache array
+    // addressing, we need the PC-relative address base.
+    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
+      InitializePCRelativeBasePointer();
+      // Add the extra parameter base_.
+      DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
+      invoke_static_or_direct->AddSpecialInput(base_);
+    }
     // Ensure that we can load FP arguments from the constant area.
     for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
       HConstant* input = invoke->InputAt(i)->AsConstant();
@@ -123,9 +139,10 @@
   HX86ComputeBaseMethodAddress* base_;
 };
 
-void ConstantAreaFixups::Run() {
-  ConstantHandlerVisitor visitor(graph_);
+void PcRelativeFixups::Run() {
+  PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/constant_area_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
similarity index 67%
rename from compiler/optimizing/constant_area_fixups_x86.h
rename to compiler/optimizing/pc_relative_fixups_x86.h
index 4138039..af708ac 100644
--- a/compiler/optimizing/constant_area_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
-#define ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
+#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
+#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
 
 #include "nodes.h"
 #include "optimization.h"
@@ -23,10 +23,10 @@
 namespace art {
 namespace x86 {
 
-class ConstantAreaFixups : public HOptimization {
+class PcRelativeFixups : public HOptimization {
  public:
-  ConstantAreaFixups(HGraph* graph, OptimizingCompilerStats* stats)
-      : HOptimization(graph, "constant_area_fixups_x86", stats) {}
+  PcRelativeFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "pc_relative_fixups_x86", stats) {}
 
   void Run() OVERRIDE;
 };
@@ -34,4 +34,4 @@
 }  // namespace x86
 }  // namespace art
 
-#endif  // ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
+#endif  // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index ca928ae..d1770b7 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -48,16 +48,85 @@
 }
 
 void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
-  HLoadClass* cls = check->GetLoadClass();
-  check->ReplaceWith(cls);
-  if (check->GetPrevious() == cls) {
+  // Try to find a static invoke or a new-instance from which this check originated.
+  HInstruction* implicit_clinit = nullptr;
+  for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) {
+    HInstruction* user = it.Current()->GetUser();
+    if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) &&
+        CanMoveClinitCheck(check, user)) {
+      implicit_clinit = user;
+      if (user->IsInvokeStaticOrDirect()) {
+        DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck());
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+      } else {
+        DCHECK(user->IsNewInstance());
+        // We delegate the initialization duty to the allocation.
+        if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) {
+          user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved);
+        }
+      }
+      break;
+    }
+  }
+  // If we found a static invoke or new-instance for merging, remove the check
+  // from dominated static invokes.
+  if (implicit_clinit != nullptr) {
+    for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) {
+      HInstruction* user = it.Current()->GetUser();
+      // All other uses must be dominated.
+      DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user));
+      it.Advance();  // Advance before we remove the node, reference to the next node is preserved.
+      if (user->IsInvokeStaticOrDirect()) {
+        user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+            HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+      }
+    }
+  }
+
+  HLoadClass* load_class = check->GetLoadClass();
+  bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check);
+
+  check->ReplaceWith(load_class);
+
+  if (implicit_clinit != nullptr) {
+    // Remove the check from the graph. It has been merged into the invoke or new-instance.
+    check->GetBlock()->RemoveInstruction(check);
+    // Check if we can merge the load class as well.
+    if (can_merge_with_load_class && !load_class->HasUses()) {
+      load_class->GetBlock()->RemoveInstruction(load_class);
+    }
+  } else if (can_merge_with_load_class) {
     // Pass the initialization duty to the `HLoadClass` instruction,
     // and remove the instruction from the graph.
-    cls->SetMustGenerateClinitCheck(true);
+    load_class->SetMustGenerateClinitCheck(true);
     check->GetBlock()->RemoveInstruction(check);
   }
 }
 
+void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
+  HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
+  bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+  // Change the entrypoint to kQuickAllocObject if either:
+  // - the class is finalizable (only kQuickAllocObject handles finalizable classes),
+  // - the class needs access checks (we do not know if it's finalizable),
+  // - or the load class has only one use.
+  if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
+    instruction->SetEntrypoint(kQuickAllocObject);
+    instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
+    // The allocation entry point that deals with access checks does not work with inlined
+    // methods, so we need to check whether this allocation comes from an inlined method.
+    if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) {
+      // We can remove the load class from the graph. If it needed access checks, we delegate
+      // the access check to the allocation.
+      if (load_class->NeedsAccessCheck()) {
+        instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+      }
+      load_class->GetBlock()->RemoveInstruction(load_class);
+    }
+  }
+}
+
 void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
   bool needs_materialization = false;
   if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
@@ -86,30 +155,60 @@
     DCHECK(last_input != nullptr)
         << "Last input is not HLoadClass. It is " << last_input->DebugName();
 
-    // Remove a load class instruction as last input of a static
-    // invoke, which has been added (along with a clinit check,
-    // removed by PrepareForRegisterAllocation::VisitClinitCheck
-    // previously) by the graph builder during the creation of the
-    // static invoke instruction, but is no longer required at this
-    // stage (i.e., after inlining has been performed).
-    invoke->RemoveLoadClassAsLastInput();
+    // Detach the explicit class initialization check from the invoke.
+    // Keeping track of the initializing instruction is no longer required
+    // at this stage (i.e., after inlining has been performed).
+    invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
 
-    // The static call will initialize the class so there's no need for a clinit check if
-    // it's the first user.
-    // There is one special case where we still need the clinit check, when inlining. Because
-    // currently the callee is responsible for reporting parameters to the GC, the code
-    // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC.
-    // Therefore we cannot allocate any object in that code, including loading a new class.
-    if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) {
-      last_input->SetMustGenerateClinitCheck(false);
+    // Merging with load class should have happened in VisitClinitCheck().
+    DCHECK(!CanMoveClinitCheck(last_input, invoke));
+  }
+}
 
-      // If the load class instruction is no longer used, remove it from
-      // the graph.
-      if (!last_input->HasUses()) {
-        last_input->GetBlock()->RemoveInstruction(last_input);
-      }
+bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) {
+  // Determine if input and user come from the same dex instruction, so that we can move
+  // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user)
+  // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user).
+
+  // Start with a quick dex pc check.
+  if (user->GetDexPc() != input->GetDexPc()) {
+    return false;
+  }
+
+  // Now do a thorough environment check that this is really coming from the same instruction in
+  // the same inlined graph. Unfortunately, we have to go through the whole environment chain.
+  HEnvironment* user_environment = user->GetEnvironment();
+  HEnvironment* input_environment = input->GetEnvironment();
+  while (user_environment != nullptr || input_environment != nullptr) {
+    if (user_environment == nullptr || input_environment == nullptr) {
+      // Different environment chain length. This happens when a method is called
+      // once directly and once indirectly through another inlined method.
+      return false;
+    }
+    if (user_environment->GetDexPc() != input_environment->GetDexPc() ||
+        user_environment->GetMethodIdx() != input_environment->GetMethodIdx() ||
+        !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) {
+      return false;
+    }
+    user_environment = user_environment->GetParent();
+    input_environment = input_environment->GetParent();
+  }
+
+  // Check for code motion taking the input to a different block.
+  if (user->GetBlock() != input->GetBlock()) {
+    return false;
+  }
+
+  // In debug mode, check that we have not inserted a throwing instruction
+  // or an instruction with side effects between input and user.
+  if (kIsDebugBuild) {
+    for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
+      CHECK(between != nullptr);  // User must be after input in the same block.
+      CHECK(!between->CanThrow());
+      CHECK(!between->HasSideEffects());
     }
   }
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index d7f277f..9b24342 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,9 @@
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitNewInstance(HNewInstance* instruction) OVERRIDE;
+
+  bool CanMoveClinitCheck(HInstruction* input, HInstruction* user);
 
   DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
 };
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index c98f43e..bde54ee 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -63,7 +63,6 @@
             : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
         phi->ReplaceInput(equivalent, i);
         if (equivalent->IsPhi()) {
-          equivalent->AsPhi()->SetLive();
           AddToWorklist(equivalent->AsPhi());
         } else if (equivalent == input) {
           // The input has changed its type. It can be an input of other phis,
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 659da06..dd34924 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -99,17 +99,9 @@
   }
 }
 
-void ReferenceTypePropagation::Run() {
-  // To properly propagate type info we need to visit in the dominator-based order.
-  // Reverse post order guarantees a node's dominators are visited first.
-  // We take advantage of this order in `VisitBasicBlock`.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
-  }
-  ProcessWorklist();
-
+void ReferenceTypePropagation::ValidateTypes() {
+  // TODO: move this to the graph checker.
   if (kIsDebugBuild) {
-    // TODO: move this to the graph checker.
     ScopedObjectAccess soa(Thread::Current());
     for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
       HBasicBlock* block = it.Current();
@@ -135,6 +127,100 @@
   }
 }
 
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+  ArenaAllocatorAdapter<void> adapter =
+      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+  ArenaVector<HPhi*> visited_phis(adapter);
+  ArenaVector<HInstruction*> worklist(adapter);
+  worklist.push_back(root_instr);
+
+  while (!worklist.empty()) {
+    HInstruction* instr = worklist.back();
+    worklist.pop_back();
+
+    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+      // Expect that both `root_instr` and its inputs have invalid RTI.
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+      // Insert all unvisited inputs to the worklist.
+      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current();
+        if (input->IsPhi()) {
+          if (ContainsElement(visited_phis, input->AsPhi())) {
+            continue;
+          } else {
+            visited_phis.push_back(input->AsPhi());
+          }
+        }
+        worklist.push_back(input);
+      }
+    } else if (instr->IsNullConstant()) {
+      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+    } else {
+      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+          << instr->GetReferenceTypeInfo();
+      UNREACHABLE();
+    }
+  }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+  }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+  // In some cases, the fix-point iteration will leave kPrimNot instructions with
+  // invalid RTI because bytecode does not provide enough typing information.
+  // Set the RTI of such instructions to Object.
+  // Example:
+  //   MyClass a = null, b = null;
+  //   while (a == null) {
+  //     if (cond) { a = b; } else { b = a; }
+  //   }
+
+  if (kIsDebugBuild) {
+    // Test that if we are going to set RTI from invalid to Object, that
+    // instruction did not have any typed instructions in its def-use chain
+    // and therefore its type could not be inferred.
+    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+  }
+
+  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+    instr->SetReferenceTypeInfo(obj_rti);
+  });
+}
+
+void ReferenceTypePropagation::Run() {
+  // To properly propagate type info we need to visit in the dominator-based order.
+  // Reverse post order guarantees a node's dominators are visited first.
+  // We take advantage of this order in `VisitBasicBlock`.
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    VisitBasicBlock(it.Current());
+  }
+
+  ProcessWorklist();
+  SetUntypedInstructionsToObject();
+  ValidateTypes();
+}
+
 void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
   RTPVisitor visitor(graph_,
                      handles_,
@@ -530,8 +616,9 @@
 void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
 void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -584,11 +671,16 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -610,23 +702,36 @@
   }
 
   bool is_exact = a.IsExact() && b.IsExact();
-  Handle<mirror::Class> type_handle;
+  ReferenceTypeInfo::TypeHandle result_type_handle;
+  ReferenceTypeInfo::TypeHandle a_type_handle = a.GetTypeHandle();
+  ReferenceTypeInfo::TypeHandle b_type_handle = b.GetTypeHandle();
+  bool a_is_interface = a_type_handle->IsInterface();
+  bool b_is_interface = b_type_handle->IsInterface();
 
   if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
-    type_handle = a.GetTypeHandle();
+    result_type_handle = a_type_handle;
   } else if (a.IsSupertypeOf(b)) {
-    type_handle = a.GetTypeHandle();
+    result_type_handle = a_type_handle;
     is_exact = false;
   } else if (b.IsSupertypeOf(a)) {
-    type_handle = b.GetTypeHandle();
+    result_type_handle = b_type_handle;
+    is_exact = false;
+  } else if (!a_is_interface && !b_is_interface) {
+    result_type_handle = handles_->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
     is_exact = false;
   } else {
-    // TODO: Find the first common super class.
-    type_handle = object_class_handle_;
+    // This can happen if:
+    //    - both types are interfaces. TODO(calin): implement
+    //    - one is an interface, the other a class, and the type does not implement the interface
+    //      e.g:
+    //        void foo(Interface i, boolean cond) {
+    //          Object o = cond ? i : new Object();
+    //        }
+    result_type_handle = object_class_handle_;
     is_exact = false;
   }
 
-  return ReferenceTypeInfo::Create(type_handle, is_exact);
+  return ReferenceTypeInfo::Create(result_type_handle, is_exact);
 }
 
 static void UpdateArrayGet(HArrayGet* instr,
@@ -636,7 +741,9 @@
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
+  if (!parent_rti.IsValid()) {
+    return;
+  }
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
   if (handle->IsObjectArrayClass()) {
@@ -648,8 +755,6 @@
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
   }
-
-  return;
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -666,7 +771,7 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
     UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
@@ -694,6 +799,7 @@
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
+
   ScopedObjectAccess soa(Thread::Current());
   UpdateArrayGet(instr, handles_, object_class_handle_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
@@ -715,14 +821,35 @@
   instr->SetReferenceTypeInfo(new_rti);
 }
 
+// NullConstant inputs are ignored during merging as they do not provide any useful information.
+// If all the inputs are NullConstants then the type of the phi will be set to Object.
 void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
-  ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+  size_t input_count = instr->InputCount();
+  size_t first_input_index_not_null = 0;
+  while (first_input_index_not_null < input_count &&
+      instr->InputAt(first_input_index_not_null)->IsNullConstant()) {
+    first_input_index_not_null++;
+  }
+  if (first_input_index_not_null == input_count) {
+    // All inputs are NullConstants, set the type to object.
+    // This may happen in the presence of inlining.
+    instr->SetReferenceTypeInfo(
+        ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+    return;
+  }
+
+  ReferenceTypeInfo new_rti = instr->InputAt(first_input_index_not_null)->GetReferenceTypeInfo();
+
   if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) {
     // Early return if we are Object and inexact.
     instr->SetReferenceTypeInfo(new_rti);
     return;
   }
-  for (size_t i = 1; i < instr->InputCount(); i++) {
+
+  for (size_t i = first_input_index_not_null + 1; i < input_count; i++) {
+    if (instr->InputAt(i)->IsNullConstant()) {
+      continue;
+    }
     new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
     if (new_rti.IsValid() && new_rti.IsObjectClass()) {
       if (!new_rti.IsExact()) {
@@ -732,7 +859,10 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5493601..21789e1 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -56,6 +56,9 @@
   ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  void ValidateTypes();
+  void SetUntypedInstructionsToObject();
+
   StackHandleScopeCollection* handles_;
 
   ArenaVector<HInstruction*> worklist_;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index ef22c81..d399bc2 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1525,7 +1525,7 @@
   DCHECK(IsValidDestination(destination)) << destination;
   if (source.Equals(destination)) return;
 
-  DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u);
+  DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
   HInstruction* last = block->GetLastInstruction();
   // We insert moves at exit for phi predecessors and connecting blocks.
   // A block ending with an if or a packed switch cannot branch to a block
@@ -1752,7 +1752,7 @@
 
   // If `from` has only one successor, we can put the moves at the exit of it. Otherwise
   // we need to put the moves at the entry of `to`.
-  if (from->NumberOfNormalSuccessors() == 1) {
+  if (from->GetNormalSuccessors().size() == 1) {
     InsertParallelMoveAtExitOf(from,
                                interval->GetParent()->GetDefinedBy(),
                                source->ToLocation(),
@@ -1894,7 +1894,7 @@
         HInstruction* phi = inst_it.Current();
         for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
           HBasicBlock* predecessor = current->GetPredecessors()[i];
-          DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u);
+          DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
           HInstruction* input = phi->InputAt(i);
           Location source = input->GetLiveInterval()->GetLocationAt(
               predecessor->GetLifetimeEnd() - 1);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a128079..5e1d1d9 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -49,7 +49,8 @@
   }
 
   // TODO: Avoid CompilerDriver.
-  InvokeType invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType optimized_invoke_type = original_invoke_type;
   MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
   int vtable_idx;
   uintptr_t direct_code, direct_method;
@@ -58,15 +59,18 @@
       invoke->GetDexPc(),
       false /* update_stats: already updated in builder */,
       true /* enable_devirtualization */,
-      &invoke_type,
+      &optimized_invoke_type,
       &target_method,
       &vtable_idx,
       &direct_code,
       &direct_method);
-  DCHECK(success);
-  DCHECK_EQ(invoke_type, invoke->GetInvokeType());
-  DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
-  DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+  if (!success) {
+    // TODO: try using kDexCachePcRelative. It's always a valid method load
+    // kind as long as it's supported by the codegen
+    return;
+  }
+  invoke->SetOptimizedInvokeType(optimized_invoke_type);
+  invoke->SetTargetMethod(target_method);
 
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index ec45d6b..9bbc354 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -129,13 +129,13 @@
 
 TEST(SideEffectsTest, VolatileDependences) {
   SideEffects volatile_write =
-      SideEffects::FieldWriteOfType(Primitive::kPrimInt, true);
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true);
   SideEffects any_write =
-      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false);
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false);
   SideEffects volatile_read =
-      SideEffects::FieldReadOfType(Primitive::kPrimByte, true);
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true);
   SideEffects any_read =
-      SideEffects::FieldReadOfType(Primitive::kPrimByte, false);
+      SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false);
 
   EXPECT_FALSE(volatile_write.MayDependOn(any_read));
   EXPECT_TRUE(any_read.MayDependOn(volatile_write));
@@ -151,15 +151,15 @@
 TEST(SideEffectsTest, SameWidthTypes) {
   // Type I/F.
   testWriteAndReadDependence(
-      SideEffects::FieldWriteOfType(Primitive::kPrimInt, false),
-      SideEffects::FieldReadOfType(Primitive::kPrimFloat, false));
+      SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false),
+      SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false));
   testWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
       SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   // Type L/D.
   testWriteAndReadDependence(
-      SideEffects::FieldWriteOfType(Primitive::kPrimLong, false),
-      SideEffects::FieldReadOfType(Primitive::kPrimDouble, false));
+      SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false),
+      SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false));
   testWriteAndReadDependence(
       SideEffects::ArrayWriteOfType(Primitive::kPrimLong),
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
@@ -171,9 +171,9 @@
   for (Primitive::Type type = Primitive::kPrimNot;
         type < Primitive::kPrimVoid;
         type = Primitive::Type(type + 1)) {
-    s = s.Union(SideEffects::FieldWriteOfType(type, false));
+    s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false));
     s = s.Union(SideEffects::ArrayWriteOfType(type));
-    s = s.Union(SideEffects::FieldReadOfType(type, false));
+    s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false));
     s = s.Union(SideEffects::ArrayReadOfType(type));
   }
   EXPECT_TRUE(s.DoesAllReadWrite());
@@ -225,10 +225,10 @@
       "||DJ|||||",  // note: DJ alias
       SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str());
   SideEffects s = SideEffects::None();
-  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false));
-  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false));
+  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false));
+  s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false));
   s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort));
-  s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false));
+  s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
   s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
   EXPECT_STREQ(
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 4565590..9e6cfbe 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -22,6 +22,13 @@
 
 namespace art {
 
+// Returns whether this is a loop header phi which was eagerly created but later
+// found inconsistent due to the vreg being undefined in one of its predecessors.
+// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
+static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
+  return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
+}
+
 /**
  * A debuggable application may require to reviving phis, to ensure their
  * associated DEX register is available to a debugger. This class implements
@@ -165,17 +172,15 @@
 void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
+    if (IsUndefinedLoopHeaderPhi(phi)) {
+      DCHECK(phi->IsDead());
+      continue;
+    }
     if (phi->IsDead() && phi->HasEnvironmentUses()) {
       phi->SetLive();
       if (block->IsLoopHeader()) {
-        // Give a type to the loop phi to guarantee convergence of the algorithm.
-        // Note that the dead phi may already have a type if it is an equivalent
-        // generated for a typed LoadLocal. In that case we do not change the
-        // type because it could lead to an unsupported PrimNot/Float/Double ->
-        // PrimInt/Long transition and create same type equivalents.
-        if (phi->GetType() == Primitive::kPrimVoid) {
-          phi->SetType(phi->InputAt(0)->GetType());
-        }
+        // Loop phis must have a type to guarantee convergence of the algorithm.
+        DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
         AddToWorklist(phi);
       } else {
         // Because we are doing a reverse post order visit, all inputs of
@@ -220,6 +225,27 @@
   ProcessWorklist();
 }
 
+void SsaBuilder::SetLoopHeaderPhiInputs() {
+  for (size_t i = loop_headers_.size(); i > 0; --i) {
+    HBasicBlock* block = loop_headers_[i - 1];
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* phi = it.Current()->AsPhi();
+      size_t vreg = phi->GetRegNumber();
+      for (HBasicBlock* predecessor : block->GetPredecessors()) {
+        HInstruction* value = ValueOfLocal(predecessor, vreg);
+        if (value == nullptr) {
+          // Vreg is undefined at this predecessor. Mark it dead and leave with
+          // fewer inputs than predecessors. SsaChecker will fail if not removed.
+          phi->SetDead();
+          break;
+        } else {
+          phi->AddInput(value);
+        }
+      }
+    }
+  }
+}
+
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
   for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
@@ -283,15 +309,7 @@
   }
 
   // 2) Set inputs of loop phis.
-  for (HBasicBlock* block : loop_headers_) {
-    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber());
-        phi->AddInput(input);
-      }
-    }
-  }
+  SetLoopHeaderPhiInputs();
 
   // 3) Mark dead phis. This will mark phis that are only used by environments:
   // at the DEX level, the type of these phis does not need to be consistent, but
@@ -403,8 +421,13 @@
       for (size_t i = 0; i < vregs; ++i) {
         // No point in creating the catch phi if it is already undefined at
         // the first throwing instruction.
-        if ((*current_locals_)[i] != nullptr) {
-          HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid);
+        HInstruction* current_local_value = (*current_locals_)[i];
+        if (current_local_value != nullptr) {
+          HPhi* phi = new (arena) HPhi(
+              arena,
+              i,
+              0,
+              current_local_value->GetType());
           block->AddPhi(phi);
           (*locals)[i] = phi;
         }
@@ -451,7 +474,10 @@
       HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
       if (incoming != nullptr) {
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
+            GetGraph()->GetArena(),
+            local,
+            0,
+            incoming->GetType());
         block->AddPhi(phi);
         (*current_locals_)[local] = phi;
       }
@@ -484,8 +510,12 @@
       }
 
       if (is_different) {
+        HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
         HPhi* phi = new (GetGraph()->GetArena()) HPhi(
-            GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
+            GetGraph()->GetArena(),
+            local,
+            block->GetPredecessors().size(),
+            first_input->GetType());
         for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
           HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
           phi->SetRawInputAt(i, pred_value);
@@ -583,8 +613,16 @@
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
     return new_phi;
   } else {
-    DCHECK_EQ(next->GetType(), type);
-    return next->AsPhi();
+    HPhi* next_phi = next->AsPhi();
+    DCHECK_EQ(next_phi->GetType(), type);
+    if (next_phi->IsDead()) {
+      // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
+      // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
+      // cannot revive undefined loop header phis because they cannot have uses.
+      DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
+      next_phi->SetLive();
+    }
+    return next_phi;
   }
 }
 
@@ -638,7 +676,36 @@
 }
 
 void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
-  (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
+  uint32_t reg_number = store->GetLocal()->GetRegNumber();
+  HInstruction* stored_value = store->InputAt(1);
+  Primitive::Type stored_type = stored_value->GetType();
+  DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+  // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+  // registers. Consider the following cases:
+  // (1) Storing a wide value must overwrite previous values in both `reg_number`
+  //     and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+  // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+  //     must invalidate it. We store `nullptr` in `reg_number-1`.
+  // Consequently, storing a wide value into the high vreg of another wide value
+  // will invalidate both `reg_number-1` and `reg_number+1`.
+
+  if (reg_number != 0) {
+    HInstruction* local_low = (*current_locals_)[reg_number - 1];
+    if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+      // The vreg we are storing into was previously the high vreg of a pair.
+      // We need to invalidate its low vreg.
+      DCHECK((*current_locals_)[reg_number] == nullptr);
+      (*current_locals_)[reg_number - 1] = nullptr;
+    }
+  }
+
+  (*current_locals_)[reg_number] = stored_value;
+  if (Primitive::Is64BitType(stored_type)) {
+    // We are storing a pair. Invalidate the instruction in the high vreg.
+    (*current_locals_)[reg_number + 1] = nullptr;
+  }
+
   store->GetBlock()->RemoveInstruction(store);
 }
 
@@ -660,8 +727,7 @@
   if (instruction->CanThrowIntoCatchBlock()) {
     const HTryBoundary& try_entry =
         instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
-    for (HExceptionHandlerIterator it(try_entry); !it.Done(); it.Advance()) {
-      HBasicBlock* catch_block = it.Current();
+    for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
       ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
       DCHECK_EQ(handler_locals->size(), current_locals_->size());
       for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 79f1a28..dcce5e4 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -81,6 +81,7 @@
   static constexpr const char* kSsaBuilderPassName = "ssa_builder";
 
  private:
+  void SetLoopHeaderPhiInputs();
   void FixNullConstantType();
   void EquivalentPhisCleanup();
 
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 72f9ddd..a3219dc 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -16,6 +16,8 @@
 
 #include "ssa_phi_elimination.h"
 
+#include "base/arena_containers.h"
+
 namespace art {
 
 void SsaDeadPhiElimination::Run() {
@@ -24,22 +26,36 @@
 }
 
 void SsaDeadPhiElimination::MarkDeadPhis() {
+  // Phis are constructed live and should not be revived if previously marked
+  // dead. This algorithm temporarily breaks that invariant but we DCHECK that
+  // only phis which were initially live are revived.
+  ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter());
+
   // Add to the worklist phis referenced by non-phi instructions.
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HPhi* phi = inst_it.Current()->AsPhi();
-      // Set dead ahead of running through uses. The phi may have no use.
-      phi->SetDead();
+      if (phi->IsDead()) {
+        continue;
+      }
+
+      bool has_non_phi_use = false;
       for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
-        HUseListNode<HInstruction*>* current = use_it.Current();
-        HInstruction* user = current->GetUser();
-        if (!user->IsPhi()) {
-          worklist_.push_back(phi);
-          phi->SetLive();
+        if (!use_it.Current()->GetUser()->IsPhi()) {
+          has_non_phi_use = true;
           break;
         }
       }
+
+      if (has_non_phi_use) {
+        worklist_.push_back(phi);
+      } else {
+        phi->SetDead();
+        if (kIsDebugBuild) {
+          initially_live.insert(phi);
+        }
+      }
     }
   }
 
@@ -48,10 +64,13 @@
     HPhi* phi = worklist_.back();
     worklist_.pop_back();
     for (HInputIterator it(phi); !it.Done(); it.Advance()) {
-      HInstruction* input = it.Current();
-      if (input->IsPhi() && input->AsPhi()->IsDead()) {
-        worklist_.push_back(input->AsPhi());
-        input->AsPhi()->SetLive();
+      HPhi* input = it.Current()->AsPhi();
+      if (input != nullptr && input->IsDead()) {
+        // Input is a dead phi. Revive it and add to the worklist. We make sure
+        // that the phi was not dead initially (see definition of `initially_live`).
+        DCHECK(ContainsElement(initially_live, input));
+        input->SetLive();
+        worklist_.push_back(input);
       }
     }
   }
@@ -118,7 +137,6 @@
     }
 
     if (phi->InputCount() == 0) {
-      DCHECK(phi->IsCatchPhi());
       DCHECK(phi->IsDead());
       continue;
     }
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 4d30b83..8f6b6d8 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -45,6 +45,14 @@
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
+  /*
+   * Flushes the stream. Returns whether the operation was successful.
+   *
+   * An OutputStream may delay reporting errors from WriteFully() or
+   * Seek(). In that case, Flush() shall report any pending error.
+   */
+  virtual bool Flush() = 0;
+
  private:
   const std::string location_;
 
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
index 6104ccd..84c76f2 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/output_stream_test.cc
@@ -19,6 +19,7 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -48,6 +49,7 @@
     EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
     CheckOffset(10);
     EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
+    EXPECT_TRUE(output_stream_->Flush());
   }
 
   void CheckTestOutput(const std::vector<uint8_t>& actual) {
@@ -77,9 +79,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-    CHECK(file_output_stream.get() != nullptr);
-    BufferedOutputStream buffered_output_stream(file_output_stream.release());
+    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -99,4 +99,39 @@
   CheckTestOutput(output);
 }
 
+TEST_F(OutputStreamTest, BufferedFlush) {
+  struct CheckingOutputStream : OutputStream {
+    CheckingOutputStream()
+        : OutputStream("dummy"),
+          flush_called(false) { }
+    ~CheckingOutputStream() OVERRIDE {}
+
+    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
+                    size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    bool Flush() OVERRIDE {
+      flush_called = true;
+      return true;
+    }
+
+    bool flush_called;
+  };
+
+  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  CheckingOutputStream* checking_output_stream = cos.get();
+  BufferedOutputStream buffered(std::move(cos));
+  ASSERT_FALSE(checking_output_stream->flush_called);
+  bool flush_result = buffered.Flush();
+  ASSERT_TRUE(flush_result);
+  ASSERT_TRUE(checking_output_stream->flush_called);
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 68e3956..dead8fd 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -342,9 +342,9 @@
       return IsAbsoluteUint<12>(offset);
     case kLoadSWord:
     case kLoadDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kLoadWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -360,9 +360,9 @@
       return IsAbsoluteUint<12>(offset);
     case kStoreSWord:
     case kStoreDWord:
-      return IsAbsoluteUint<10>(offset);  // VFP addressing mode.
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;  // VFP addressing mode.
     case kStoreWordPair:
-      return IsAbsoluteUint<10>(offset);
+      return IsAbsoluteUint<10>(offset) && (offset & 3) == 0;
     default:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 5233dcb..ce3a872 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -389,8 +389,6 @@
   void EmitBranch(Condition cond, Label* label, bool link);
   static int32_t EncodeBranchOffset(int offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op);
 };
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index fb3aa1e..cdeb443 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -282,6 +282,32 @@
   }
 }
 
+void Thumb2Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
 inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
   DCHECK_ALIGNED(offset, 2);
   int16_t encoding = B15 | B14;
@@ -463,6 +489,7 @@
   EmitLiterals();
   FinalizeTrackedLabels();
   EmitJumpTables();
+  PatchCFI();
 }
 
 bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
@@ -1322,7 +1349,8 @@
   int32_t encoding = 0;
   if (so.IsImmediate()) {
     // Check special cases.
-    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) {
+    if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) &&
+        /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) {
       if (set_cc != kCcSet) {
         if (opcode == SUB) {
           thumb_opcode = 5U;
@@ -2541,30 +2569,19 @@
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
@@ -3193,7 +3210,7 @@
 
 void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) {
   CheckCondition(cond);
-  EmitShift(rd, rm, RRX, rm, cond, set_cc);
+  EmitShift(rd, rm, RRX, 0, cond, set_cc);
 }
 
 
@@ -3442,6 +3459,73 @@
   }
 }
 
+int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) {
+  switch (type) {
+    case kLoadSignedByte:
+    case kLoadSignedHalfword:
+    case kLoadUnsignedHalfword:
+    case kLoadUnsignedByte:
+    case kLoadWord:
+      // We can encode imm12 offset.
+      return 0xfffu;
+    case kLoadSWord:
+    case kLoadDWord:
+    case kLoadWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) {
+  switch (type) {
+    case kStoreHalfword:
+    case kStoreByte:
+    case kStoreWord:
+      // We can encode imm12 offset.
+      return 0xfff;
+    case kStoreSWord:
+    case kStoreDWord:
+    case kStoreWordPair:
+      // We can encode imm8:'00' offset.
+      return 0xff << 2;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+}
+
+bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                                              int32_t offset,
+                                              /*out*/ int32_t* add_to_base,
+                                              /*out*/ int32_t* offset_for_load_store) {
+  int32_t other_bits = offset & ~allowed_offset_bits;
+  if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) {
+    *add_to_base = offset & ~allowed_offset_bits;
+    *offset_for_load_store = offset & allowed_offset_bits;
+    return true;
+  }
+  return false;
+}
+
+int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                               Register temp,
+                                               Register base,
+                                               int32_t offset,
+                                               Condition cond) {
+  DCHECK_NE(offset & ~allowed_offset_bits, 0);
+  int32_t add_to_base, offset_for_load;
+  if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+    AddConstant(temp, base, add_to_base, cond, kCcKeep);
+    return offset_for_load;
+  } else {
+    LoadImmediate(temp, offset, cond);
+    add(temp, temp, ShifterOperand(base), cond, kCcKeep);
+    return 0;
+  }
+}
 
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb.
@@ -3452,12 +3536,26 @@
                                      Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
+    // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks.
+    int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type);
+    DCHECK_NE(offset & ~allowed_offset_bits, 0);
+    int32_t add_to_base, offset_for_load;
+    if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) {
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      AddConstant(reg, base, add_to_base, cond, kCcKeep);
+      base = reg;
+      offset = offset_for_load;
+    } else {
+      Register temp = (reg == base) ? IP : reg;
+      LoadImmediate(temp, offset, cond);
+      // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD.
+      // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load.
+      add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep);
+      base = reg;
+      offset = 0;
+    }
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(type, offset));
   switch (type) {
     case kLoadSignedByte:
       ldrsb(reg, Address(base, offset), cond);
@@ -3483,7 +3581,6 @@
   }
 }
 
-
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
 void Thumb2Assembler::LoadSFromOffset(SRegister reg,
@@ -3492,12 +3589,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
   vldrs(reg, Address(base, offset), cond);
 }
 
@@ -3510,12 +3605,10 @@
                                       Condition cond) {
   if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+  DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
   vldrd(reg, Address(base, offset), cond);
 }
 
@@ -3546,12 +3639,12 @@
         offset += kRegisterSize;
       }
     }
-    LoadImmediate(tmp_reg, offset, cond);
-    add(tmp_reg, tmp_reg, ShifterOperand(base), AL);
+    // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset()
+    // and in the "unsplittable" path get rid of the "add" by using the store indexed instead.
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond);
     base = tmp_reg;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(type, offset));
   switch (type) {
     case kStoreByte:
       strb(reg, Address(base, offset), cond);
@@ -3584,12 +3677,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
   vstrs(reg, Address(base, offset), cond);
 }
 
@@ -3602,12 +3693,10 @@
                                      Condition cond) {
   if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
     CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
+    offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond);
     base = IP;
-    offset = 0;
   }
-  CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+  DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
   vstrd(reg, Address(base, offset), cond);
 }
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 38fd244..9aeece8 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -44,6 +44,7 @@
         last_position_adjustment_(0u),
         last_old_position_(0u),
         last_fixup_id_(0u) {
+    cfi().DelayEmittingAdvancePCs();
   }
 
   virtual ~Thumb2Assembler() {
@@ -728,13 +729,23 @@
   void EmitBranch(Condition cond, Label* label, bool link, bool x);
   static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
   static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
   void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
   void EmitShift(Register rd, Register rn, Shift shift, Register rm,
                  Condition cond = AL, SetCc set_cc = kCcDontCare);
 
+  static int32_t GetAllowedLoadOffsetBits(LoadOperandType type);
+  static int32_t GetAllowedStoreOffsetBits(StoreOperandType type);
+  bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
+                               int32_t offset,
+                               /*out*/ int32_t* add_to_base,
+                               /*out*/ int32_t* offset_for_load_store);
+  int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits,
+                                Register temp,
+                                Register base,
+                                int32_t offset,
+                                Condition cond);
+
   // Whether the assembler can relocate branches. If false, unresolved branches will be
   // emitted on 32bits.
   bool can_relocate_branches_;
@@ -792,6 +803,7 @@
   void EmitFixups(uint32_t adjusted_code_size);
   void EmitLiterals();
   void EmitJumpTables();
+  void PatchCFI();
 
   static int16_t BEncoding16(int32_t offset, Condition cond);
   static int32_t BEncoding32(int32_t offset, Condition cond);
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index cb4b20b..7b32b0f 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -243,7 +243,7 @@
 
   const char* expected =
       "subs r1, r0, #42\n"
-      "subw r1, r0, #42\n"
+      "sub.w r1, r0, #42\n"
       "subs r1, r0, r2, asr #31\n"
       "sub r1, r0, r2, asr #31\n";
   DriverStr(expected, "sub");
@@ -257,7 +257,7 @@
 
   const char* expected =
       "adds r1, r0, #42\n"
-      "addw r1, r0, #42\n"
+      "add.w r1, r0, #42\n"
       "adds r1, r0, r2, asr #31\n"
       "add r1, r0, r2, asr #31\n";
   DriverStr(expected, "add");
@@ -305,21 +305,18 @@
   __ StoreToOffset(type, arm::IP, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #4096\n"   // AddConstant(ip, sp, 4096)
       "str r0, [ip, #0]\n"
 
-      "str r5, [sp, #-4]!\n"  // Push(r5)
-      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "str ip, [r5, #0]\n"
-      "ldr r5, [sp], #4\n"    // Pop(r5)
+      "str r5, [sp, #-4]!\n"    // Push(r5)
+      "add.w r5, sp, #4096\n"   // AddConstant(r5, 4100 & ~0xfff)
+      "str ip, [r5, #4]\n"      // StoreToOffset(type, ip, r5, 4100 & 0xfff)
+      "ldr r5, [sp], #4\n"      // Pop(r5)
 
-      "str r6, [sp, #-4]!\n"  // Push(r6)
-      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
-      "add r6, r6, r5\n"
-      "str ip, [r6, #0]\n"
-      "ldr r6, [sp], #4\n";   // Pop(r6)
+      "str r6, [sp, #-4]!\n"    // Push(r6)
+      "add.w r6, r5, #4096\n"   // AddConstant(r6, r5, 4096 & ~0xfff)
+      "str ip, [r6, #0]\n"      // StoreToOffset(type, ip, r6, 4096 & 0xfff)
+      "ldr r6, [sp], #4\n";     // Pop(r6)
   DriverStr(expected, "StoreWordToNonThumbOffset");
 }
 
@@ -360,20 +357,17 @@
   __ StoreToOffset(type, arm::R11, arm::R5, offset);
 
   const char* expected =
-      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
-      "add ip, ip, sp\n"
+      "add.w ip, sp, #1024\n"     // AddConstant(ip, sp, 1024)
       "strd r0, r1, [ip, #0]\n"
 
       "str r5, [sp, #-4]!\n"      // Push(r5)
-      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
-      "add r5, r5, sp\n"
-      "strd r11, ip, [r5, #0]\n"
+      "add.w r5, sp, #1024\n"     // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc)
+      "strd r11, ip, [r5, #4]\n"  // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc)
       "ldr r5, [sp], #4\n"        // Pop(r5)
 
       "str r6, [sp, #-4]!\n"      // Push(r6)
-      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
-      "add r6, r6, r5\n"
-      "strd r11, ip, [r6, #0]\n"
+      "add.w r6, r5, #1024\n"     // AddConstant(r6, r5, 1024 & ~0x3fc)
+      "strd r11, ip, [r6, #0]\n"  // StoreToOffset(type, r11, r6, 1024 & 0x3fc)
       "ldr r6, [sp], #4\n";       // Pop(r6)
   DriverStr(expected, "StoreWordPairToNonThumbOffset");
 }
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index b01b0fe..f784d2c 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -38,6 +38,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86_64
 #include "x86_64/assembler_x86_64.h"
 #endif
+#include "base/casts.h"
 #include "globals.h"
 #include "memory_region.h"
 
@@ -119,7 +120,13 @@
 }
 
 void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() {
-  this->AdvancePC(assembler_->CodeSize());
+  uint32_t pc = dchecked_integral_cast<uint32_t>(assembler_->CodeSize());
+  if (delay_emitting_advance_pc_) {
+    uint32_t stream_pos = dchecked_integral_cast<uint32_t>(opcodes_.size());
+    delayed_advance_pcs_.push_back(DelayedAdvancePC {stream_pos, pc});
+  } else {
+    AdvancePC(pc);
+  }
 }
 
 Assembler* Assembler::Create(InstructionSet instruction_set,
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index dfe6bab..1dbc142 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -271,16 +271,71 @@
 class DebugFrameOpCodeWriterForAssembler FINAL
     : public dwarf::DebugFrameOpCodeWriter<> {
  public:
+  struct DelayedAdvancePC {
+    uint32_t stream_pos;
+    uint32_t pc;
+  };
+
   // This method is called the by the opcode writers.
   virtual void ImplicitlyAdvancePC() FINAL;
 
   explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer)
-      : dwarf::DebugFrameOpCodeWriter<>(),
-        assembler_(buffer) {
+      : dwarf::DebugFrameOpCodeWriter<>(false /* enabled */),
+        assembler_(buffer),
+        delay_emitting_advance_pc_(false),
+        delayed_advance_pcs_() {
+  }
+
+  ~DebugFrameOpCodeWriterForAssembler() {
+    DCHECK(delayed_advance_pcs_.empty());
+  }
+
+  // Tell the writer to delay emitting advance PC info.
+  // The assembler must explicitly process all the delayed advances.
+  void DelayEmittingAdvancePCs() {
+    delay_emitting_advance_pc_ = true;
+  }
+
+  // Override the last delayed PC. The new PC can be out of order.
+  void OverrideDelayedPC(size_t pc) {
+    DCHECK(delay_emitting_advance_pc_);
+    DCHECK(!delayed_advance_pcs_.empty());
+    delayed_advance_pcs_.back().pc = pc;
+  }
+
+  // Return the number of delayed advance PC entries.
+  size_t NumberOfDelayedAdvancePCs() const {
+    return delayed_advance_pcs_.size();
+  }
+
+  // Release the CFI stream and advance PC infos so that the assembler can patch it.
+  std::pair<std::vector<uint8_t>, std::vector<DelayedAdvancePC>>
+  ReleaseStreamAndPrepareForDelayedAdvancePC() {
+    DCHECK(delay_emitting_advance_pc_);
+    delay_emitting_advance_pc_ = false;
+    std::pair<std::vector<uint8_t>, std::vector<DelayedAdvancePC>> result;
+    result.first.swap(opcodes_);
+    result.second.swap(delayed_advance_pcs_);
+    return result;
+  }
+
+  // Reserve space for the CFI stream.
+  void ReserveCFIStream(size_t capacity) {
+    opcodes_.reserve(capacity);
+  }
+
+  // Append raw data to the CFI stream.
+  void AppendRawData(const std::vector<uint8_t>& raw_data, size_t first, size_t last) {
+    DCHECK_LE(0u, first);
+    DCHECK_LE(first, last);
+    DCHECK_LE(last, raw_data.size());
+    opcodes_.insert(opcodes_.end(), raw_data.begin() + first, raw_data.begin() + last);
   }
 
  private:
   Assembler* assembler_;
+  bool delay_emitting_advance_pc_;
+  std::vector<DelayedAdvancePC> delayed_advance_pcs_;
 };
 
 class Assembler {
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index f1233ca..9457da1 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -840,12 +840,17 @@
     return str;
   }
 
+  // Override this to pad the code with NOPs to a certain size if needed.
+  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
+  }
+
   void DriverWrapper(std::string assembly_text, std::string test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
     assembler_->FinalizeInstructions(code);
+    Pad(*data);
     test_helper_->Driver(*data, assembly_text, test_name);
   }
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 2ae8841..5ae2cc2 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -466,6 +466,38 @@
   EmitAndCheck(&assembler, "DataProcessingShiftedRegister");
 }
 
+TEST(Thumb2AssemblerTest, ShiftImmediate) {
+  // Note: This test produces the same results as DataProcessingShiftedRegister
+  // but it does so using shift functions instead of mov().
+  arm::Thumb2Assembler assembler;
+
+  // 16-bit variants.
+  __ Lsl(R3, R4, 4);
+  __ Lsr(R3, R4, 5);
+  __ Asr(R3, R4, 6);
+
+  // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts.
+  __ Ror(R3, R4, 7);
+
+  // 32-bit RRX because RRX has no 16-bit version.
+  __ Rrx(R3, R4);
+
+  // 32 bit variants (not setting condition codes).
+  __ Lsl(R3, R4, 4, AL, kCcKeep);
+  __ Lsr(R3, R4, 5, AL, kCcKeep);
+  __ Asr(R3, R4, 6, AL, kCcKeep);
+  __ Ror(R3, R4, 7, AL, kCcKeep);
+  __ Rrx(R3, R4, AL, kCcKeep);
+
+  // 32 bit variants (high registers).
+  __ Lsls(R8, R4, 4);
+  __ Lsrs(R8, R4, 5);
+  __ Asrs(R8, R4, 6);
+  __ Rors(R8, R4, 7);
+  __ Rrxs(R8, R4);
+
+  EmitAndCheck(&assembler, "ShiftImmediate");
+}
 
 TEST(Thumb2AssemblerTest, BasicLoad) {
   arm::Thumb2Assembler assembler;
@@ -800,11 +832,12 @@
 TEST(Thumb2AssemblerTest, MovWMovT) {
   arm::Thumb2Assembler assembler;
 
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
@@ -823,29 +856,80 @@
 
   __ add(R2, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
   __ add(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size.
+  __ add(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
-  __ sub(SP, SP, ShifterOperand(0x50));     // 16 bit
-  __ sub(R0, SP, ShifterOperand(0x50));     // 32 bit
-  __ sub(R8, SP, ShifterOperand(0x50));     // 32 bit.
+  __ sub(SP, SP, ShifterOperand(0x50));   // 16 bit
+  __ sub(R0, SP, ShifterOperand(0x50));   // 32 bit
+  __ sub(R8, SP, ShifterOperand(0x50));   // 32 bit.
 
-  __ sub(SP, SP, ShifterOperand(0xf00));   // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xf00));  // 32 bit due to imm size
+  __ sub(SP, SP, ShifterOperand(0xffc));  // 32 bit due to imm size; encoding T4.
 
   EmitAndCheck(&assembler, "SpecialAddSub");
 }
 
+TEST(Thumb2AssemblerTest, LoadFromOffset) {
+  arm::Thumb2Assembler assembler;
+
+  __ LoadFromOffset(kLoadWord, R2, R4, 12);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadWord, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadWord, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000);
+  __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 12);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4);
+  __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400);
+  __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400);
+
+  __ LoadFromOffset(kLoadWord, R0, R12, 12);  // 32-bit because of R12.
+  __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000);
+
+  __ LoadFromOffset(kLoadSignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12);
+  __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12);
+
+  EmitAndCheck(&assembler, "LoadFromOffset");
+}
+
 TEST(Thumb2AssemblerTest, StoreToOffset) {
   arm::Thumb2Assembler assembler;
 
-  __ StoreToOffset(kStoreWord, R2, R4, 12);     // Simple
-  __ StoreToOffset(kStoreWord, R2, R4, 0x2000);     // Offset too big.
-  __ StoreToOffset(kStoreWord, R0, R12, 12);
-  __ StoreToOffset(kStoreHalfword, R0, R12, 12);
-  __ StoreToOffset(kStoreByte, R2, R12, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 12);
+  __ StoreToOffset(kStoreWord, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreWord, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreWord, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 12);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4);
+  __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000);
+  __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 12);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4);
+  __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400);
+  __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400);
+
+  __ StoreToOffset(kStoreWord, R0, R12, 12);  // 32-bit because of R12.
+  __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000);
+
+  __ StoreToOffset(kStoreByte, R2, R4, 12);
 
   EmitAndCheck(&assembler, "StoreToOffset");
 }
 
-
 TEST(Thumb2AssemblerTest, IfThen) {
   arm::Thumb2Assembler assembler;
 
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index b79c2e4..886295e 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -132,8 +132,8 @@
 const char* DataProcessingImmediateResults[] = {
   "   0:	2055      	movs	r0, #85	; 0x55\n",
   "   2:	f06f 0055 	mvn.w	r0, #85	; 0x55\n",
-  "   6:	f201 0055 	addw	r0, r1, #85	; 0x55\n",
-  "   a:	f2a1 0055 	subw	r0, r1, #85	; 0x55\n",
+  "   6:	f101 0055 	add.w	r0, r1, #85	; 0x55\n",
+  "   a:	f1a1 0055 	sub.w	r0, r1, #85	; 0x55\n",
   "   e:	f001 0055 	and.w	r0, r1, #85	; 0x55\n",
   "  12:	f041 0055 	orr.w	r0, r1, #85	; 0x55\n",
   "  16:	f061 0055 	orn	r0, r1, #85	; 0x55\n",
@@ -201,6 +201,24 @@
   "  32:	ea5f 0834 	movs.w	r8, r4, rrx\n",
   nullptr
 };
+const char* ShiftImmediateResults[] = {
+  "   0:  0123        lsls  r3, r4, #4\n",
+  "   2:  0963        lsrs  r3, r4, #5\n",
+  "   4:  11a3        asrs  r3, r4, #6\n",
+  "   6:  ea4f 13f4   mov.w  r3, r4, ror #7\n",
+  "   a:  ea4f 0334   mov.w  r3, r4, rrx\n",
+  "   e:  ea4f 1304   mov.w r3, r4, lsl #4\n",
+  "  12:  ea4f 1354   mov.w r3, r4, lsr #5\n",
+  "  16:  ea4f 13a4   mov.w r3, r4, asr #6\n",
+  "  1a:  ea4f 13f4   mov.w r3, r4, ror #7\n",
+  "  1e:  ea4f 0334   mov.w r3, r4, rrx\n",
+  "  22:  ea5f 1804   movs.w  r8, r4, lsl #4\n",
+  "  26:  ea5f 1854   movs.w  r8, r4, lsr #5\n",
+  "  2a:  ea5f 18a4   movs.w  r8, r4, asr #6\n",
+  "  2e:  ea5f 18f4   movs.w  r8, r4, ror #7\n",
+  "  32:  ea5f 0834   movs.w  r8, r4, rrx\n",
+  nullptr
+};
 const char* BasicLoadResults[] = {
   "   0:	69a3      	ldr	r3, [r4, #24]\n",
   "   2:	7e23      	ldrb	r3, [r4, #24]\n",
@@ -421,36 +439,128 @@
   nullptr
 };
 const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
 const char* SpecialAddSubResults[] = {
   "   0:	aa14      	add	r2, sp, #80	; 0x50\n",
   "   2:	b014      	add	sp, #80		; 0x50\n",
-  "   4:	f20d 0850 	addw	r8, sp, #80	; 0x50\n",
-  "   8:	f60d 7200 	addw	r2, sp, #3840	; 0xf00\n",
-  "   c:	f60d 7d00 	addw	sp, sp, #3840	; 0xf00\n",
-  "  10:	b094      	sub	sp, #80		; 0x50\n",
-  "  12:	f2ad 0050 	subw	r0, sp, #80	; 0x50\n",
-  "  16:	f2ad 0850 	subw	r8, sp, #80	; 0x50\n",
-  "  1a:	f6ad 7d00 	subw	sp, sp, #3840	; 0xf00\n",
+  "   4:	f10d 0850 	add.w	r8, sp, #80	; 0x50\n",
+  "   8:	f50d 6270 	add.w	r2, sp, #3840	; 0xf00\n",
+  "   c:	f50d 6d70 	add.w	sp, sp, #3840	; 0xf00\n",
+  "  10:	f60d 7dfc 	addw	sp, sp, #4092	; 0xffc\n",
+  "  14:	b094      	sub	sp, #80		; 0x50\n",
+  "  16:	f1ad 0050 	sub.w	r0, sp, #80	; 0x50\n",
+  "  1a:	f1ad 0850 	sub.w	r8, sp, #80	; 0x50\n",
+  "  1e:	f5ad 6d70 	sub.w	sp, sp, #3840	; 0xf00\n",
+  "  22:	f6ad 7dfc 	subw	sp, sp, #4092	; 0xffc\n",
+  nullptr
+};
+const char* LoadFromOffsetResults[] = {
+  "   0:	68e2      	ldr	r2, [r4, #12]\n",
+  "   2:	f8d4 2fff 	ldr.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "   a:	6812      	ldr	r2, [r2, #0]\n",
+  "   c:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  10:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  14:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  18:	f2c0 0210 	movt	r2, #16\n",
+  "  1c:	4422      	add	r2, r4\n",
+  "  1e:	6812      	ldr	r2, [r2, #0]\n",
+  "  20:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  24:	f2c0 0c10 	movt	ip, #16\n",
+  "  28:	4464      	add	r4, ip\n",
+  "  2a:	6824      	ldr	r4, [r4, #0]\n",
+  "  2c:	89a2      	ldrh	r2, [r4, #12]\n",
+  "  2e:	f8b4 2fff 	ldrh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  32:	f504 5280 	add.w	r2, r4, #4096	; 0x1000\n",
+  "  36:	8812      	ldrh	r2, [r2, #0]\n",
+  "  38:	f504 1280 	add.w	r2, r4, #1048576	; 0x100000\n",
+  "  3c:	f8b2 20a4 	ldrh.w	r2, [r2, #164]	; 0xa4\n",
+  "  40:	f241 0200 	movw	r2, #4096	; 0x1000\n",
+  "  44:	f2c0 0210 	movt	r2, #16\n",
+  "  48:	4422      	add	r2, r4\n",
+  "  4a:	8812      	ldrh	r2, [r2, #0]\n",
+  "  4c:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  50:	f2c0 0c10 	movt	ip, #16\n",
+  "  54:	4464      	add	r4, ip\n",
+  "  56:	8824      	ldrh	r4, [r4, #0]\n",
+  "  58:	e9d4 2303 	ldrd	r2, r3, [r4, #12]\n",
+  "  5c:	e9d4 23ff 	ldrd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  60:	f504 6280 	add.w	r2, r4, #1024	; 0x400\n",
+  "  64:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  68:	f504 2280 	add.w	r2, r4, #262144	; 0x40000\n",
+  "  6c:	e9d2 2329 	ldrd	r2, r3, [r2, #164];	0xa4\n",
+  "  70:	f240 4200 	movw	r2, #1024	; 0x400\n",
+  "  74:	f2c0 0204 	movt	r2, #4\n",
+  "  78:	4422      	add	r2, r4\n",
+  "  7a:	e9d2 2300 	ldrd	r2, r3, [r2]\n",
+  "  7e:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  82:	f2c0 0c04 	movt	ip, #4\n",
+  "  86:	4464      	add	r4, ip\n",
+  "  88:	e9d4 4500 	ldrd	r4, r5, [r4]\n",
+  "  8c:	f8dc 000c 	ldr.w	r0, [ip, #12]\n",
+  "  90:	f5a4 1280 	sub.w	r2, r4, #1048576	; 0x100000\n",
+  "  94:	f8d2 20a4 	ldr.w	r2, [r2, #164]	; 0xa4\n",
+  "  98:	f994 200c 	ldrsb.w	r2, [r4, #12]\n",
+  "  9c:	7b22      	ldrb	r2, [r4, #12]\n",
+  "  9e:	f9b4 200c 	ldrsh.w	r2, [r4, #12]\n",
   nullptr
 };
 const char* StoreToOffsetResults[] = {
   "   0:	60e2      	str	r2, [r4, #12]\n",
-  "   2:	f44f 5c00 	mov.w	ip, #8192	; 0x2000\n",
-  "   6:	44a4      	add	ip, r4\n",
-  "   8:	f8cc 2000 	str.w	r2, [ip]\n",
-  "   c:	f8cc 000c 	str.w	r0, [ip, #12]\n",
-  "   10:	f8ac 000c 	strh.w	r0, [ip, #12]\n",
-  "   14:	f88c 200c 	strb.w	r2, [ip, #12]\n",
+  "   2:	f8c4 2fff 	str.w	r2, [r4, #4095]	; 0xfff\n",
+  "   6:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "   a:	f8cc 2000 	str.w	r2, [ip]\n",
+  "   e:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  12:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  16:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  1a:	f2c0 0c10 	movt	ip, #16\n",
+  "  1e:	44a4      	add	ip, r4\n",
+  "  20:	f8cc 2000 	str.w	r2, [ip]\n",
+  "  24:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  28:	f2c0 0c10 	movt	ip, #16\n",
+  "  2c:	44a4      	add	ip, r4\n",
+  "  2e:	f8cc 4000 	str.w	r4, [ip]\n",
+  "  32:	81a2      	strh	r2, [r4, #12]\n",
+  "  34:	f8a4 2fff 	strh.w	r2, [r4, #4095]	; 0xfff\n",
+  "  38:	f504 5c80 	add.w	ip, r4, #4096	; 0x1000\n",
+  "  3c:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  40:	f504 1c80 	add.w	ip, r4, #1048576	; 0x100000\n",
+  "  44:	f8ac 20a4 	strh.w	r2, [ip, #164]	; 0xa4\n",
+  "  48:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  4c:	f2c0 0c10 	movt	ip, #16\n",
+  "  50:	44a4      	add	ip, r4\n",
+  "  52:	f8ac 2000 	strh.w	r2, [ip]\n",
+  "  56:	f241 0c00 	movw	ip, #4096	; 0x1000\n",
+  "  5a:	f2c0 0c10 	movt	ip, #16\n",
+  "  5e:	44a4      	add	ip, r4\n",
+  "  60:	f8ac 4000 	strh.w	r4, [ip]\n",
+  "  64:	e9c4 2303 	strd	r2, r3, [r4, #12]\n",
+  "  68:	e9c4 23ff 	strd	r2, r3, [r4, #1020]	; 0x3fc\n",
+  "  6c:	f504 6c80 	add.w	ip, r4, #1024	; 0x400\n",
+  "  70:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  74:	f504 2c80 	add.w	ip, r4, #262144	; 0x40000\n",
+  "  78:	e9cc 2329 	strd	r2, r3, [ip, #164];	0xa4\n",
+  "  7c:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  80:	f2c0 0c04 	movt	ip, #4\n",
+  "  84:	44a4      	add	ip, r4\n",
+  "  86:	e9cc 2300 	strd	r2, r3, [ip]\n",
+  "  8a:	f240 4c00 	movw	ip, #1024	; 0x400\n",
+  "  8e:	f2c0 0c04 	movt	ip, #4\n",
+  "  92:	44a4      	add	ip, r4\n",
+  "  94:	e9cc 4500 	strd	r4, r5, [ip]\n",
+  "  98:	f8cc 000c 	str.w	r0, [ip, #12]\n",
+  "  9c:	f5a4 1c80 	sub.w	ip, r4, #1048576	; 0x100000\n",
+  "  a0:	f8cc 20a4 	str.w	r2, [ip, #164]	; 0xa4\n",
+  "  a4:	7322      	strb	r2, [r4, #12]\n",
   nullptr
 };
 const char* IfThenResults[] = {
@@ -4952,6 +5062,7 @@
     test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults;
     test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults;
     test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults;
+    test_results["ShiftImmediate"] = ShiftImmediateResults;
     test_results["BasicLoad"] = BasicLoadResults;
     test_results["BasicStore"] = BasicStoreResults;
     test_results["ComplexLoad"] = ComplexLoadResults;
@@ -4966,6 +5077,7 @@
     test_results["StoreMultiple"] = StoreMultipleResults;
     test_results["MovWMovT"] = MovWMovTResults;
     test_results["SpecialAddSub"] = SpecialAddSubResults;
+    test_results["LoadFromOffset"] = LoadFromOffsetResults;
     test_results["StoreToOffset"] = StoreToOffsetResults;
     test_results["IfThen"] = IfThenResults;
     test_results["CbzCbnz"] = CbzCbnzResults;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 6f35e9e..fc7ac70 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -43,8 +43,60 @@
 }
 
 void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  size_t number_of_delayed_adjust_pcs = cfi().NumberOfDelayedAdvancePCs();
   EmitBranches();
   Assembler::FinalizeInstructions(region);
+  PatchCFI(number_of_delayed_adjust_pcs);
+}
+
+void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    DCHECK_EQ(number_of_delayed_adjust_pcs, 0u);
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // PCs recorded before EmitBranches() need to be adjusted.
+  // PCs recorded during EmitBranches() are already adjusted.
+  // Both ranges are separately sorted but they may overlap.
+  if (kIsDebugBuild) {
+    auto cmp = [](const DelayedAdvancePC& lhs, const DelayedAdvancePC& rhs) {
+      return lhs.pc < rhs.pc;
+    };
+    CHECK(std::is_sorted(advances.begin(), advances.begin() + number_of_delayed_adjust_pcs, cmp));
+    CHECK(std::is_sorted(advances.begin() + number_of_delayed_adjust_pcs, advances.end(), cmp));
+  }
+
+  // Append initial CFI data if any.
+  size_t size = advances.size();
+  DCHECK_NE(size, 0u);
+  cfi().AppendRawData(old_stream, 0u, advances[0].stream_pos);
+  // Emit PC adjustments interleaved with the old CFI stream.
+  size_t adjust_pos = 0u;
+  size_t late_emit_pos = number_of_delayed_adjust_pcs;
+  while (adjust_pos != number_of_delayed_adjust_pcs || late_emit_pos != size) {
+    size_t adjusted_pc = (adjust_pos != number_of_delayed_adjust_pcs)
+        ? GetAdjustedPosition(advances[adjust_pos].pc)
+        : static_cast<size_t>(-1);
+    size_t late_emit_pc = (late_emit_pos != size)
+        ? advances[late_emit_pos].pc
+        : static_cast<size_t>(-1);
+    size_t advance_pc = std::min(adjusted_pc, late_emit_pc);
+    DCHECK_NE(advance_pc, static_cast<size_t>(-1));
+    size_t entry = (adjusted_pc <= late_emit_pc) ? adjust_pos : late_emit_pos;
+    if (adjusted_pc <= late_emit_pc) {
+      ++adjust_pos;
+    } else {
+      ++late_emit_pos;
+    }
+    cfi().AdvancePC(advance_pc);
+    size_t end_pos = (entry + 1u == size) ? old_stream.size() : advances[entry + 1u].stream_pos;
+    cfi().AppendRawData(old_stream, advances[entry].stream_pos, end_pos);
+  }
 }
 
 void MipsAssembler::EmitBranches() {
@@ -258,15 +310,27 @@
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20);
 }
 
+void MipsAssembler::Wsbh(Register rd, Register rt) {
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
+}
+
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
 }
 
 void MipsAssembler::Srl(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02);
 }
 
+void MipsAssembler::Rotr(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
+  EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02);
+}
+
 void MipsAssembler::Sra(Register rd, Register rt, int shamt) {
+  CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03);
 }
 
@@ -1770,6 +1834,7 @@
                                const std::vector<ManagedRegister>& callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
 
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
@@ -1811,6 +1876,7 @@
 void MipsAssembler::RemoveFrame(size_t frame_size,
                                 const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
   cfi_.RememberState();
 
   // Pop callee saves and return address.
@@ -1840,12 +1906,18 @@
   CHECK_ALIGNED(adjust, kFramePointerSize);
   Addiu32(SP, SP, -adjust);
   cfi_.AdjustCFAOffset(adjust);
+  if (overwriting_) {
+    cfi_.OverrideDelayedPC(overwrite_location_);
+  }
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
   Addiu32(SP, SP, adjust);
   cfi_.AdjustCFAOffset(-adjust);
+  if (overwriting_) {
+    cfi_.OverrideDelayedPC(overwrite_location_);
+  }
 }
 
 void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index aa187b8..1ef0992 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -94,7 +94,9 @@
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
-        isa_features_(instruction_set_features) {}
+        isa_features_(instruction_set_features) {
+    cfi().DelayEmittingAdvancePCs();
+  }
 
   virtual ~MipsAssembler() {
     for (auto& branch : branches_) {
@@ -133,9 +135,11 @@
 
   void Seb(Register rd, Register rt);  // R2+
   void Seh(Register rd, Register rt);  // R2+
+  void Wsbh(Register rd, Register rt);  // R2+
 
   void Sll(Register rd, Register rt, int shamt);
   void Srl(Register rd, Register rt, int shamt);
+  void Rotr(Register rd, Register rt, int shamt);  // R2+
   void Sra(Register rd, Register rt, int shamt);
   void Sllv(Register rd, Register rt, Register rs);
   void Srlv(Register rd, Register rt, Register rs);
@@ -599,6 +603,7 @@
   void PromoteBranches();
   void EmitBranch(Branch* branch);
   void EmitBranches();
+  void PatchCFI(size_t number_of_delayed_adjust_pcs);
 
   // Emits exception block.
   void EmitExceptionPoll(MipsExceptionSlowPath* exception);
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 00e8995..107d5bb 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -19,15 +19,73 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "memory_region.h"
 #include "thread.h"
 
 namespace art {
 namespace mips64 {
 
+void Mips64Assembler::FinalizeCode() {
+  for (auto& exception_block : exception_blocks_) {
+    EmitExceptionPoll(&exception_block);
+  }
+  PromoteBranches();
+}
+
+void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) {
+  EmitBranches();
+  Assembler::FinalizeInstructions(region);
+  PatchCFI();
+}
+
+void Mips64Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
+void Mips64Assembler::EmitBranches() {
+  CHECK(!overwriting_);
+  // Switch from appending instructions at the end of the buffer to overwriting
+  // existing instructions (branch placeholders) in the buffer.
+  overwriting_ = true;
+  for (auto& branch : branches_) {
+    EmitBranch(&branch);
+  }
+  overwriting_ = false;
+}
+
 void Mips64Assembler::Emit(uint32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<uint32_t>(value);
+  if (overwriting_) {
+    // Branches to labels are emitted into their placeholders here.
+    buffer_.Store<uint32_t>(overwrite_location_, value);
+    overwrite_location_ += sizeof(uint32_t);
+  } else {
+    // Other instructions are simply appended at the end here.
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    buffer_.Emit<uint32_t>(value);
+  }
 }
 
 void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd,
@@ -82,15 +140,16 @@
 
 void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) {
   CHECK_NE(rs, kNoGpuRegister);
+  CHECK(IsUint<21>(imm21)) << imm21;
   uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
                       static_cast<uint32_t>(rs) << kRsShift |
-                      (imm21 & 0x1FFFFF);
+                      imm21;
   Emit(encoding);
 }
 
-void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) {
-  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift |
-                      (addr26 & 0x3FFFFFF);
+void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) {
+  CHECK(IsUint<26>(imm26)) << imm26;
+  uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26;
   Emit(encoding);
 }
 
@@ -117,14 +176,6 @@
   Emit(encoding);
 }
 
-void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x20);
-}
-
-void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
-  EmitI(0x8, rs, rt, imm16);
-}
-
 void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
@@ -141,10 +192,6 @@
   EmitI(0x19, rs, rt, imm16);
 }
 
-void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x22);
-}
-
 void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
@@ -153,50 +200,14 @@
   EmitR(0, rs, rt, rd, 0, 0x2f);
 }
 
-void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
-}
-
-void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
-}
-
-void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0x1c, rs, rt, rd, 0, 2);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mfhi(rd);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mfhi(rd);
-}
-
 void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x18);
 }
 
+void Mips64Assembler::MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x18);
+}
+
 void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1a);
 }
@@ -217,6 +228,10 @@
   EmitR(0, rs, rt, rd, 2, 0x1c);
 }
 
+void Mips64Assembler::Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1c);
+}
+
 void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1e);
 }
@@ -440,14 +455,6 @@
            static_cast<GpuRegister>(0), stype & 0x1f, 0xf);
 }
 
-void Mips64Assembler::Mfhi(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
-}
-
-void Mips64Assembler::Mflo(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12);
-}
-
 void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x28, rs, rt, imm16);
 }
@@ -480,26 +487,6 @@
   EmitI(0xb, rs, rt, imm16);
 }
 
-void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
-  EmitI(0x4, rs, rt, imm16);
-  Nop();
-}
-
-void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
-  EmitI(0x5, rs, rt, imm16);
-  Nop();
-}
-
-void Mips64Assembler::J(uint32_t addr26) {
-  EmitJ(0x2, addr26);
-  Nop();
-}
-
-void Mips64Assembler::Jal(uint32_t addr26) {
-  EmitJ(0x3, addr26);
-  Nop();
-}
-
 void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x35);
 }
@@ -526,7 +513,6 @@
 
 void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) {
   EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09);
-  Nop();
 }
 
 void Mips64Assembler::Jalr(GpuRegister rs) {
@@ -541,6 +527,15 @@
   EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16);
 }
 
+void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) {
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, imm19);
+}
+
+void Mips64Assembler::Bc(uint32_t imm26) {
+  EmitI26(0x32, imm26);
+}
+
 void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
   EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -601,14 +596,14 @@
   CHECK_NE(rs, ZERO);
   CHECK_NE(rt, ZERO);
   CHECK_NE(rs, rt);
-  EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+  EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16);
 }
 
 void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
   CHECK_NE(rs, ZERO);
   CHECK_NE(rt, ZERO);
   CHECK_NE(rs, rt);
-  EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16);
+  EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16);
 }
 
 void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) {
@@ -621,6 +616,65 @@
   EmitI21(0x3E, rs, imm21);
 }
 
+void Mips64Assembler::EmitBcondc(BranchCondition cond,
+                                 GpuRegister rs,
+                                 GpuRegister rt,
+                                 uint32_t imm16_21) {
+  switch (cond) {
+    case kCondLT:
+      Bltc(rs, rt, imm16_21);
+      break;
+    case kCondGE:
+      Bgec(rs, rt, imm16_21);
+      break;
+    case kCondLE:
+      Bgec(rt, rs, imm16_21);
+      break;
+    case kCondGT:
+      Bltc(rt, rs, imm16_21);
+      break;
+    case kCondLTZ:
+      CHECK_EQ(rt, ZERO);
+      Bltzc(rs, imm16_21);
+      break;
+    case kCondGEZ:
+      CHECK_EQ(rt, ZERO);
+      Bgezc(rs, imm16_21);
+      break;
+    case kCondLEZ:
+      CHECK_EQ(rt, ZERO);
+      Blezc(rs, imm16_21);
+      break;
+    case kCondGTZ:
+      CHECK_EQ(rt, ZERO);
+      Bgtzc(rs, imm16_21);
+      break;
+    case kCondEQ:
+      Beqc(rs, rt, imm16_21);
+      break;
+    case kCondNE:
+      Bnec(rs, rt, imm16_21);
+      break;
+    case kCondEQZ:
+      CHECK_EQ(rt, ZERO);
+      Beqzc(rs, imm16_21);
+      break;
+    case kCondNEZ:
+      CHECK_EQ(rt, ZERO);
+      Bnezc(rs, imm16_21);
+      break;
+    case kCondLTU:
+      Bltuc(rs, rt, imm16_21);
+      break;
+    case kCondGEU:
+      Bgeuc(rs, rt, imm16_21);
+      break;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+      UNREACHABLE();
+  }
+}
+
 void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
   EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
 }
@@ -892,45 +946,58 @@
   } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
     Lui(rd, value >> 16);
     Dati(rd, (value >> 48) + bit31);
+  } else if (IsPowerOfTwo(value + UINT64_C(1))) {
+    int shift_cnt = 64 - CTZ(value + UINT64_C(1));
+    Daddiu(rd, ZERO, -1);
+    if (shift_cnt < 32) {
+      Dsrl(rd, rd, shift_cnt);
+    } else {
+      Dsrl32(rd, rd, shift_cnt & 31);
+    }
   } else {
     int shift_cnt = CTZ(value);
     int64_t tmp = value >> shift_cnt;
     if (IsUint<16>(tmp)) {
       Ori(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<16>(tmp)) {
       Daddiu(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<32>(tmp)) {
       // Loads with 3 instructions.
       Lui(rd, tmp >> 16);
       Ori(rd, rd, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else {
       shift_cnt = 16 + CTZ(value >> 16);
       tmp = value >> shift_cnt;
       if (IsUint<16>(tmp)) {
         Ori(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else if (IsInt<16>(tmp)) {
         Daddiu(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else {
         // Loads with 3-4 instructions.
@@ -941,10 +1008,11 @@
           used_lui = true;
         }
         if ((tmp2 & 0xFFFF) != 0) {
-          if (used_lui)
+          if (used_lui) {
             Ori(rd, rd, tmp2);
-          else
+          } else {
             Ori(rd, ZERO, tmp2);
+          }
         }
         if (bit31) {
           tmp2 += UINT64_C(0x100000000);
@@ -963,15 +1031,6 @@
   }
 }
 
-void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) {
-  if (IsInt<16>(value)) {
-    Addiu(rt, rs, value);
-  } else {
-    LoadConst32(rtmp, value);
-    Addu(rt, rs, rtmp);
-  }
-}
-
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
   if (IsInt<16>(value)) {
     Daddiu(rt, rs, value);
@@ -981,177 +1040,621 @@
   }
 }
 
-//
-// MIPS64R6 branches
-//
-//
-// Unconditional (pc + 32-bit signed offset):
-//
-//   auipc    at, ofs_high
-//   jic      at, ofs_low
-//   // no delay/forbidden slot
-//
-//
-// Conditional (pc + 32-bit signed offset):
-//
-//   b<cond>c   reg, +2      // skip next 2 instructions
-//   auipc      at, ofs_high
-//   jic        at, ofs_low
-//   // no delay/forbidden slot
-//
-//
-// Unconditional (pc + 32-bit signed offset) and link:
-//
-//   auipc    reg, ofs_high
-//   daddiu   reg, ofs_low
-//   jialc    reg, 0
-//   // no delay/forbidden slot
-//
-//
-// TODO: use shorter instruction sequences whenever possible.
-//
+void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size,
+                                              Mips64Assembler::Branch::Type short_type,
+                                              Mips64Assembler::Branch::Type long_type) {
+  type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
+}
 
-void Mips64Assembler::Bind(Label* label) {
+void Mips64Assembler::Branch::InitializeType(bool is_call) {
+  OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+  if (is_call) {
+    InitShortOrLong(offset_size, kCall, kLongCall);
+  } else if (condition_ == kUncond) {
+    InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+  } else {
+    if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
+      // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+      type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
+    } else {
+      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+    }
+  }
+  old_type_ = type_;
+}
+
+bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) {
+  switch (condition) {
+    case kCondLT:
+    case kCondGT:
+    case kCondNE:
+    case kCondLTU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+bool Mips64Assembler::Branch::IsUncond(BranchCondition condition,
+                                       GpuRegister lhs,
+                                       GpuRegister rhs) {
+  switch (condition) {
+    case kUncond:
+      return true;
+    case kCondGE:
+    case kCondLE:
+    case kCondEQ:
+    case kCondGEU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(ZERO),
+      rhs_reg_(ZERO),
+      condition_(kUncond) {
+  InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location,
+                                uint32_t target,
+                                Mips64Assembler::BranchCondition condition,
+                                GpuRegister lhs_reg,
+                                GpuRegister rhs_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(lhs_reg),
+      rhs_reg_(rhs_reg),
+      condition_(condition) {
+  CHECK_NE(condition, kUncond);
+  switch (condition) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondLT:
+    case kCondGE:
+    case kCondLE:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGEU:
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_NE(rhs_reg, ZERO);
+      break;
+    case kCondLTZ:
+    case kCondGEZ:
+    case kCondLEZ:
+    case kCondGTZ:
+    case kCondEQZ:
+    case kCondNEZ:
+      CHECK_NE(lhs_reg, ZERO);
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
+    case kUncond:
+      UNREACHABLE();
+  }
+  CHECK(!IsNop(condition, lhs_reg, rhs_reg));
+  if (IsUncond(condition, lhs_reg, rhs_reg)) {
+    // Branch condition is always true, make the branch unconditional.
+    condition_ = kUncond;
+  }
+  InitializeType(false);
+}
+
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(indirect_reg),
+      rhs_reg_(ZERO),
+      condition_(kUncond) {
+  CHECK_NE(indirect_reg, ZERO);
+  CHECK_NE(indirect_reg, AT);
+  InitializeType(true);
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
+    Mips64Assembler::BranchCondition cond) {
+  switch (cond) {
+    case kCondLT:
+      return kCondGE;
+    case kCondGE:
+      return kCondLT;
+    case kCondLE:
+      return kCondGT;
+    case kCondGT:
+      return kCondLE;
+    case kCondLTZ:
+      return kCondGEZ;
+    case kCondGEZ:
+      return kCondLTZ;
+    case kCondLEZ:
+      return kCondGTZ;
+    case kCondGTZ:
+      return kCondLEZ;
+    case kCondEQ:
+      return kCondNE;
+    case kCondNE:
+      return kCondEQ;
+    case kCondEQZ:
+      return kCondNEZ;
+    case kCondNEZ:
+      return kCondEQZ;
+    case kCondLTU:
+      return kCondGEU;
+    case kCondGEU:
+      return kCondLTU;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << cond;
+  }
+  UNREACHABLE();
+}
+
+Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const {
+  return type_;
+}
+
+Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const {
+  return condition_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetLeftRegister() const {
+  return lhs_reg_;
+}
+
+GpuRegister Mips64Assembler::Branch::GetRightRegister() const {
+  return rhs_reg_;
+}
+
+uint32_t Mips64Assembler::Branch::GetTarget() const {
+  return target_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLocation() const {
+  return location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLocation() const {
+  return old_location_;
+}
+
+uint32_t Mips64Assembler::Branch::GetLength() const {
+  return branch_info_[type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetOldLength() const {
+  return branch_info_[old_type_].length;
+}
+
+uint32_t Mips64Assembler::Branch::GetSize() const {
+  return GetLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOldSize() const {
+  return GetOldLength() * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetEndLocation() const {
+  return GetLocation() + GetSize();
+}
+
+uint32_t Mips64Assembler::Branch::GetOldEndLocation() const {
+  return GetOldLocation() + GetOldSize();
+}
+
+bool Mips64Assembler::Branch::IsLong() const {
+  switch (type_) {
+    // Short branches.
+    case kUncondBranch:
+    case kCondBranch:
+    case kCall:
+      return false;
+    // Long branches.
+    case kLongUncondBranch:
+    case kLongCondBranch:
+    case kLongCall:
+      return true;
+  }
+  UNREACHABLE();
+}
+
+bool Mips64Assembler::Branch::IsResolved() const {
+  return target_ != kUnresolved;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const {
+  OffsetBits offset_size =
+      (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+          ? kOffset23
+          : branch_info_[type_].offset_size;
+  return offset_size;
+}
+
+Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location,
+                                                                                 uint32_t target) {
+  // For unresolved targets assume the shortest encoding
+  // (later it will be made longer if needed).
+  if (target == kUnresolved)
+    return kOffset16;
+  int64_t distance = static_cast<int64_t>(target) - location;
+  // To simplify calculations in composite branches consisting of multiple instructions
+  // bump up the distance by a value larger than the max byte size of a composite branch.
+  distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize;
+  if (IsInt<kOffset16>(distance))
+    return kOffset16;
+  else if (IsInt<kOffset18>(distance))
+    return kOffset18;
+  else if (IsInt<kOffset21>(distance))
+    return kOffset21;
+  else if (IsInt<kOffset23>(distance))
+    return kOffset23;
+  else if (IsInt<kOffset28>(distance))
+    return kOffset28;
+  return kOffset32;
+}
+
+void Mips64Assembler::Branch::Resolve(uint32_t target) {
+  target_ = target;
+}
+
+void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+  if (location_ > expand_location) {
+    location_ += delta;
+  }
+  if (!IsResolved()) {
+    return;  // Don't know the target yet.
+  }
+  if (target_ > expand_location) {
+    target_ += delta;
+  }
+}
+
+void Mips64Assembler::Branch::PromoteToLong() {
+  switch (type_) {
+    // Short branches.
+    case kUncondBranch:
+      type_ = kLongUncondBranch;
+      break;
+    case kCondBranch:
+      type_ = kLongCondBranch;
+      break;
+    case kCall:
+      type_ = kLongCall;
+      break;
+    default:
+      // Note: 'type_' is already long.
+      break;
+  }
+  CHECK(IsLong());
+}
+
+uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
+  // If the branch is still unresolved or already long, nothing to do.
+  if (IsLong() || !IsResolved()) {
+    return 0;
+  }
+  // Promote the short branch to long if the offset size is too small
+  // to hold the distance between location_ and target_.
+  if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) {
+    PromoteToLong();
+    uint32_t old_size = GetOldSize();
+    uint32_t new_size = GetSize();
+    CHECK_GT(new_size, old_size);
+    return new_size - old_size;
+  }
+  // The following logic is for debugging/testing purposes.
+  // Promote some short branches to long when it's not really required.
+  if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+    int64_t distance = static_cast<int64_t>(target_) - location_;
+    distance = (distance >= 0) ? distance : -distance;
+    if (distance >= max_short_distance) {
+      PromoteToLong();
+      uint32_t old_size = GetOldSize();
+      uint32_t new_size = GetSize();
+      CHECK_GT(new_size, old_size);
+      return new_size - old_size;
+    }
+  }
+  return 0;
+}
+
+uint32_t Mips64Assembler::Branch::GetOffsetLocation() const {
+  return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t);
+}
+
+uint32_t Mips64Assembler::Branch::GetOffset() const {
+  CHECK(IsResolved());
+  uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
+  // Calculate the byte distance between instructions and also account for
+  // different PC-relative origins.
+  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  // Prepare the offset for encoding into the instruction(s).
+  offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
+  return offset;
+}
+
+Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+void Mips64Assembler::Bind(Mips64Label* label) {
   CHECK(!label->IsBound());
-  int32_t bound_pc = buffer_.Size();
+  uint32_t bound_pc = buffer_.Size();
 
-  // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label.
-  // Embed the previously unknown pc-relative addresses in them.
+  // Walk the list of branches referring to and preceding this label.
+  // Store the previously unknown target addresses in them.
   while (label->IsLinked()) {
-    int32_t position = label->Position();
-    // Extract the branch (instruction pair)
-    uint32_t auipc = buffer_.Load<uint32_t>(position);
-    uint32_t jic = buffer_.Load<uint32_t>(position + 4);  // actually, jic or daddiu
+    uint32_t branch_id = label->Position();
+    Branch* branch = GetBranch(branch_id);
+    branch->Resolve(bound_pc);
 
-    // Extract the location of the previous pair in the list (walking the list backwards;
-    // the previous pair location was stored in the immediate operands of the instructions)
-    int32_t prev = (auipc << 16) | (jic & 0xFFFF);
-
-    // Get the pc-relative address
-    uint32_t offset = bound_pc - position;
-    offset += (offset & 0x8000) << 1;  // account for sign extension in jic/daddiu
-
-    // Embed it in the two instructions
-    auipc = (auipc & 0xFFFF0000) | (offset >> 16);
-    jic = (jic & 0xFFFF0000) | (offset & 0xFFFF);
-
-    // Save the adjusted instructions
-    buffer_.Store<uint32_t>(position, auipc);
-    buffer_.Store<uint32_t>(position + 4, jic);
+    uint32_t branch_location = branch->GetLocation();
+    // Extract the location of the previous branch in the list (walking the list backwards;
+    // the previous branch ID was stored in the space reserved for this branch).
+    uint32_t prev = buffer_.Load<uint32_t>(branch_location);
 
     // On to the previous branch in the list...
     label->position_ = prev;
   }
 
-  // Now make the label object contain its own location
-  // (it will be used by the branches referring to and following this label)
+  // Now make the label object contain its own location (relative to the end of the preceding
+  // branch, if any; it will be used by the branches referring to and following this label).
+  label->prev_branch_id_plus_one_ = branches_.size();
+  if (label->prev_branch_id_plus_one_) {
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    bound_pc -= branch->GetEndLocation();
+  }
   label->BindTo(bound_pc);
 }
 
-void Mips64Assembler::B(Label* label) {
-  if (label->IsBound()) {
-    // Branch backwards (to a preceding label), distance is known
-    uint32_t offset = label->Position() - buffer_.Size();
-    CHECK_LE(static_cast<int32_t>(offset), 0);
-    offset += (offset & 0x8000) << 1;  // account for sign extension in jic
-    Auipc(AT, offset >> 16);
-    Jic(AT, offset);
-  } else {
-    // Branch forward (to a following label), distance is unknown
-    int32_t position = buffer_.Size();
-    // The first branch forward will have 0 in its pc-relative address (copied from label's
-    // position). It will be the terminator of the list of forward-reaching branches.
-    uint32_t prev = label->position_;
-    Auipc(AT, prev >> 16);
-    Jic(AT, prev);
-    // Now make the link object point to the location of this branch
-    // (this forms a linked list of branches preceding this label)
-    label->LinkTo(position);
+uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const {
+  CHECK(label->IsBound());
+  uint32_t target = label->Position();
+  if (label->prev_branch_id_plus_one_) {
+    // Get label location based on the branch preceding it.
+    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
+    const Branch* branch = GetBranch(branch_id);
+    target += branch->GetEndLocation();
+  }
+  return target;
+}
+
+uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) {
+  // We can reconstruct the adjustment by going through all the branches from the beginning
+  // up to the old_position. Since we expect AdjustedPosition() to be called in a loop
+  // with increasing old_position, we can use the data from last AdjustedPosition() to
+  // continue where we left off and the whole loop should be O(m+n) where m is the number
+  // of positions to adjust and n is the number of branches.
+  if (old_position < last_old_position_) {
+    last_position_adjustment_ = 0;
+    last_old_position_ = 0;
+    last_branch_id_ = 0;
+  }
+  while (last_branch_id_ != branches_.size()) {
+    const Branch* branch = GetBranch(last_branch_id_);
+    if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+      break;
+    }
+    last_position_adjustment_ += branch->GetSize() - branch->GetOldSize();
+    ++last_branch_id_;
+  }
+  last_old_position_ = old_position;
+  return old_position + last_position_adjustment_;
+}
+
+void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) {
+  uint32_t length = branches_.back().GetLength();
+  if (!label->IsBound()) {
+    // Branch forward (to a following label), distance is unknown.
+    // The first branch forward will contain 0, serving as the terminator of
+    // the list of forward-reaching branches.
+    Emit(label->position_);
+    length--;
+    // Now make the label object point to this branch
+    // (this forms a linked list of branches preceding this label).
+    uint32_t branch_id = branches_.size() - 1;
+    label->LinkTo(branch_id);
+  }
+  // Reserve space for the branch.
+  while (length--) {
+    Nop();
   }
 }
 
-void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) {
-  if (label->IsBound()) {
-    // Branch backwards (to a preceding label), distance is known
-    uint32_t offset = label->Position() - buffer_.Size();
-    CHECK_LE(static_cast<int32_t>(offset), 0);
-    offset += (offset & 0x8000) << 1;  // account for sign extension in daddiu
-    Auipc(indirect_reg, offset >> 16);
-    Daddiu(indirect_reg, indirect_reg, offset);
-    Jialc(indirect_reg, 0);
-  } else {
-    // Branch forward (to a following label), distance is unknown
-    int32_t position = buffer_.Size();
-    // The first branch forward will have 0 in its pc-relative address (copied from label's
-    // position). It will be the terminator of the list of forward-reaching branches.
-    uint32_t prev = label->position_;
-    Auipc(indirect_reg, prev >> 16);
-    Daddiu(indirect_reg, indirect_reg, prev);
-    Jialc(indirect_reg, 0);
-    // Now make the link object point to the location of this branch
-    // (this forms a linked list of branches preceding this label)
-    label->LinkTo(position);
+void Mips64Assembler::Buncond(Mips64Label* label) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Bcond(Mips64Label* label,
+                            BranchCondition condition,
+                            GpuRegister lhs,
+                            GpuRegister rhs) {
+  // If lhs = rhs, this can be a NOP.
+  if (Branch::IsNop(condition, lhs, rhs)) {
+    return;
+  }
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, indirect_reg);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::PromoteBranches() {
+  // Promote short branches to long as necessary.
+  bool changed;
+  do {
+    changed = false;
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+      uint32_t delta = branch.PromoteIfNeeded();
+      // If this branch has been promoted and needs to expand in size,
+      // relocate all branches by the expansion size.
+      if (delta) {
+        changed = true;
+        uint32_t expand_location = branch.GetLocation();
+        for (auto& branch2 : branches_) {
+          branch2.Relocate(expand_location, delta);
+        }
+      }
+    }
+  } while (changed);
+
+  // Account for branch expansion by resizing the code buffer
+  // and moving the code in it to its final location.
+  size_t branch_count = branches_.size();
+  if (branch_count > 0) {
+    // Resize.
+    Branch& last_branch = branches_[branch_count - 1];
+    uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+    uint32_t old_size = buffer_.Size();
+    buffer_.Resize(old_size + size_delta);
+    // Move the code residing between branch placeholders.
+    uint32_t end = old_size;
+    for (size_t i = branch_count; i > 0; ) {
+      Branch& branch = branches_[--i];
+      uint32_t size = end - branch.GetOldEndLocation();
+      buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+      end = branch.GetOldLocation();
+    }
   }
 }
 
-void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bgec(rs, rt, 2);
-  B(label);
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = {
+  // Short branches.
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kUncondBranch
+  {  2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kCondBranch
+                                                        // Exception: kOffset23 for beqzc/bnezc
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kCall
+  // Long branches.
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongUncondBranch
+  {  3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCondBranch
+  {  3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCall
+};
+
+// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
+void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
+  CHECK(overwriting_);
+  overwrite_location_ = branch->GetLocation();
+  uint32_t offset = branch->GetOffset();
+  BranchCondition condition = branch->GetCondition();
+  GpuRegister lhs = branch->GetLeftRegister();
+  GpuRegister rhs = branch->GetRightRegister();
+  switch (branch->GetType()) {
+    // Short branches.
+    case Branch::kUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Bc(offset);
+      break;
+    case Branch::kCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcondc(condition, lhs, rhs, offset);
+      Nop();  // TODO: improve by filling the forbidden slot.
+      break;
+    case Branch::kCall:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Addiupc(lhs, offset);
+      Jialc(lhs, 0);
+      break;
+
+    // Long branches.
+    case Branch::kLongUncondBranch:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kLongCondBranch:
+      EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jic.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Jic(AT, Low16Bits(offset));
+      break;
+    case Branch::kLongCall:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in daddiu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(lhs, High16Bits(offset));
+      Daddiu(lhs, lhs, Low16Bits(offset));
+      Jialc(lhs, 0);
+      break;
+  }
+  CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+  CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
 }
 
-void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) {
-  Bgezc(rt, 2);
-  B(label);
+void Mips64Assembler::Bc(Mips64Label* label) {
+  Buncond(label);
 }
 
-void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) {
-  Blezc(rt, 2);
-  B(label);
+void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) {
+  Call(label, indirect_reg);
 }
 
-void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bltc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLT, rs, rt);
 }
 
-void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) {
-  Bltzc(rt, 2);
-  B(label);
+void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLTZ, rt);
 }
 
-void Mips64Assembler::Blezc(GpuRegister rt, Label* label) {
-  Bgtzc(rt, 2);
-  B(label);
+void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGTZ, rt);
 }
 
-void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bgeuc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGE, rs, rt);
 }
 
-void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bltuc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGEZ, rt);
 }
 
-void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) {
-  Bnec(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLEZ, rt);
 }
 
-void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) {
-  Beqc(rs, rt, 2);
-  B(label);
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondLTU, rs, rt);
 }
 
-void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) {
-  Bnezc(rs, 2);
-  B(label);
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondGEU, rs, rt);
 }
 
-void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) {
-  Beqzc(rs, 2);
-  B(label);
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondEQ, rs, rt);
+}
+
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
+  Bcond(label, kCondNE, rs, rt);
+}
+
+void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) {
+  Bcond(label, kCondEQZ, rs);
+}
+
+void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
+  Bcond(label, kCondNEZ, rs);
 }
 
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
@@ -1294,6 +1797,7 @@
                                  const std::vector<ManagedRegister>& callee_save_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
 
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
@@ -1336,6 +1840,7 @@
 void Mips64Assembler::RemoveFrame(size_t frame_size,
                                   const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
   cfi_.RememberState();
 
   // Pop callee saves and return address
@@ -1354,6 +1859,7 @@
 
   // Then jump to the return address.
   Jr(RA);
+  Nop();
 
   // The CFI should be restored for any code that follows the exit block.
   cfi_.RestoreState();
@@ -1362,12 +1868,14 @@
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
+  DCHECK(!overwriting_);
   Daddiu64(SP, SP, static_cast<int32_t>(-adjust));
   cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
+  DCHECK(!overwriting_);
   Daddiu64(SP, SP, static_cast<int32_t>(adjust));
   cfi_.AdjustCFAOffset(-adjust);
 }
@@ -1417,17 +1925,7 @@
   StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value());
 }
 
-void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                               ManagedRegister mscratch) {
-  Mips64ManagedRegister scratch = mscratch.AsMips64();
-  CHECK(scratch.IsGpuRegister()) << scratch;
-  // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?).
-  // Is this function even referenced anywhere else in the code?
-  LoadConst32(scratch.AsGpuRegister(), imm);
-  StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value());
-}
-
-void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs,
+void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
                                                  FrameOffset fr_offs,
                                                  ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -1436,7 +1934,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value());
 }
 
-void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) {
+void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) {
   StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value());
 }
 
@@ -1453,7 +1951,9 @@
   return EmitLoad(mdest, SP, src.Int32Value(), size);
 }
 
-void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) {
+void Mips64Assembler::LoadFromThread64(ManagedRegister mdest,
+                                       ThreadOffset<kMipsDoublewordSize> src,
+                                       size_t size) {
   return EmitLoad(mdest, S1, src.Int32Value(), size);
 }
 
@@ -1487,18 +1987,20 @@
 }
 
 void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest,
-                                             ThreadOffset<8> offs) {
+                                             ThreadOffset<kMipsDoublewordSize> offs) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister());
   LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value());
 }
 
-void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips";
+void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+                                 size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64";
 }
 
-void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips";
+void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
+                                 size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64";
 }
 
 void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
@@ -1530,7 +2032,7 @@
 }
 
 void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs,
-                                             ThreadOffset<8> thr_offs,
+                                             ThreadOffset<kMipsDoublewordSize> thr_offs,
                                              ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
@@ -1538,7 +2040,7 @@
   StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value());
 }
 
-void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs,
+void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs,
                                            FrameOffset fr_offs,
                                            ManagedRegister mscratch) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
@@ -1599,9 +2101,12 @@
   }
 }
 
-void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                           FrameOffset src_base ATTRIBUTE_UNUSED,
+                           Offset src_offset ATTRIBUTE_UNUSED,
+                           ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                           size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset,
@@ -1622,15 +2127,18 @@
   }
 }
 
-void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset
-/*src_offset*/,
-                         ManagedRegister /*mscratch*/, size_t /*size*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED,
+                           Offset dest_offset ATTRIBUTE_UNUSED,
+                           FrameOffset src ATTRIBUTE_UNUSED,
+                           Offset src_offset ATTRIBUTE_UNUSED,
+                           ManagedRegister mscratch ATTRIBUTE_UNUSED,
+                           size_t size ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
-void Mips64Assembler::MemoryBarrier(ManagedRegister) {
+void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) {
   // TODO: sync?
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
@@ -1642,7 +2150,7 @@
   CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg;
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   if (null_allowed) {
-    Label null_arg;
+    Mips64Label null_arg;
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
@@ -1669,7 +2177,7 @@
   Mips64ManagedRegister scratch = mscratch.AsMips64();
   CHECK(scratch.IsGpuRegister()) << scratch;
   if (null_allowed) {
-    Label null_arg;
+    Mips64Label null_arg;
     LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
                    handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
@@ -1691,7 +2199,7 @@
   Mips64ManagedRegister in_reg = min_reg.AsMips64();
   CHECK(out_reg.IsGpuRegister()) << out_reg;
   CHECK(in_reg.IsGpuRegister()) << in_reg;
-  Label null_arg;
+  Mips64Label null_arg;
   if (!out_reg.Equals(in_reg)) {
     LoadConst32(out_reg.AsGpuRegister(), 0);
   }
@@ -1701,11 +2209,13 @@
   Bind(&null_arg);
 }
 
-void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
+                                   bool could_be_null ATTRIBUTE_UNUSED) {
   // TODO: not validating references
 }
 
-void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
+void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
+                                   bool could_be_null ATTRIBUTE_UNUSED) {
   // TODO: not validating references
 }
 
@@ -1717,6 +2227,7 @@
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  base.AsGpuRegister(), offset.Int32Value());
   Jalr(scratch.AsGpuRegister());
+  Nop();
   // TODO: place reference map on call
 }
 
@@ -1729,11 +2240,13 @@
   LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
                  scratch.AsGpuRegister(), offset.Int32Value());
   Jalr(scratch.AsGpuRegister());
+  Nop();
   // TODO: place reference map on call
 }
 
-void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) {
-  UNIMPLEMENTED(FATAL) << "no mips64 implementation";
+void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED,
+                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "No MIPS64 implementation";
 }
 
 void Mips64Assembler::GetCurrentThread(ManagedRegister tr) {
@@ -1741,37 +2254,39 @@
 }
 
 void Mips64Assembler::GetCurrentThread(FrameOffset offset,
-                                       ManagedRegister /*mscratch*/) {
+                                       ManagedRegister mscratch ATTRIBUTE_UNUSED) {
   StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value());
 }
 
 void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   Mips64ManagedRegister scratch = mscratch.AsMips64();
-  Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust);
-  buffer_.EnqueueSlowPath(slow);
-  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
-                 S1, Thread::ExceptionOffset<8>().Int32Value());
-  Bnezc(scratch.AsGpuRegister(), slow->Entry());
+  exception_blocks_.emplace_back(scratch, stack_adjust);
+  LoadFromOffset(kLoadDoubleword,
+                 scratch.AsGpuRegister(),
+                 S1,
+                 Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value());
+  Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry());
 }
 
-void Mips64ExceptionSlowPath::Emit(Assembler* sasm) {
-  Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm);
-#define __ sp_asm->
-  __ Bind(&entry_);
-  if (stack_adjust_ != 0) {  // Fix up the frame.
-    __ DecreaseFrameSize(stack_adjust_);
+void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) {
+  Bind(exception->Entry());
+  if (exception->stack_adjust_ != 0) {  // Fix up the frame.
+    DecreaseFrameSize(exception->stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving A0 as this call won't return
-  __ Move(A0, scratch_.AsGpuRegister());
+  // Pass exception object as argument.
+  // Don't care about preserving A0 as this call won't return.
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+  Move(A0, exception->scratch_.AsGpuRegister());
   // Set up call to Thread::Current()->pDeliverException
-  __ LoadFromOffset(kLoadDoubleword, T9, S1,
-                    QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value());
-  // TODO: check T9 usage
-  __ Jr(T9);
+  LoadFromOffset(kLoadDoubleword,
+                 T9,
+                 S1,
+                 QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value());
+  Jr(T9);
+  Nop();
+
   // Call never returns
-  __ Break();
-#undef __
+  Break();
 }
 
 }  // namespace mips64
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 33f22d2..57fc19a 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -17,18 +17,22 @@
 #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
 #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
 
+#include <utility>
 #include <vector>
 
 #include "base/macros.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
-#include "utils/assembler.h"
 #include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
 
 namespace art {
 namespace mips64 {
 
+static constexpr size_t kMipsDoublewordSize = 8;
+
 enum LoadOperandType {
   kLoadSignedByte,
   kLoadUnsignedByte,
@@ -60,41 +64,78 @@
   kPositiveZero      = 0x200,
 };
 
+class Mips64Label : public Label {
+ public:
+  Mips64Label() : prev_branch_id_plus_one_(0) {}
+
+  Mips64Label(Mips64Label&& src)
+      : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+
+ private:
+  uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
+
+  friend class Mips64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Mips64Label);
+};
+
+// Slowpath entered when Thread::Current()->_exception is non-null.
+class Mips64ExceptionSlowPath {
+ public:
+  explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {}
+
+  Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src)
+      : scratch_(src.scratch_),
+        stack_adjust_(src.stack_adjust_),
+        exception_entry_(std::move(src.exception_entry_)) {}
+
+ private:
+  Mips64Label* Entry() { return &exception_entry_; }
+  const Mips64ManagedRegister scratch_;
+  const size_t stack_adjust_;
+  Mips64Label exception_entry_;
+
+  friend class Mips64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
+};
+
 class Mips64Assembler FINAL : public Assembler {
  public:
-  Mips64Assembler() {}
-  virtual ~Mips64Assembler() {}
+  Mips64Assembler()
+      : overwriting_(false),
+        overwrite_location_(0),
+        last_position_adjustment_(0),
+        last_old_position_(0),
+        last_branch_id_(0) {
+    cfi().DelayEmittingAdvancePCs();
+  }
+
+  virtual ~Mips64Assembler() {
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+    }
+  }
 
   // Emit Machine Instructions.
-  void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
   void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
-  void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
-  void MultR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MultuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -104,12 +145,12 @@
   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
 
-  void Bitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Dbitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Seb(GpuRegister rd, GpuRegister rt);  // R2+
-  void Seh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dsbh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dshd(GpuRegister rd, GpuRegister rt);  // R2+
+  void Bitswap(GpuRegister rd, GpuRegister rt);
+  void Dbitswap(GpuRegister rd, GpuRegister rt);
+  void Seb(GpuRegister rd, GpuRegister rt);
+  void Seh(GpuRegister rd, GpuRegister rt);
+  void Dsbh(GpuRegister rd, GpuRegister rt);
+  void Dshd(GpuRegister rd, GpuRegister rt);
   void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -146,11 +187,9 @@
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
-  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
-  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
+  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
-  void Mfhi(GpuRegister rd);  // R2
-  void Mflo(GpuRegister rd);  // R2
 
   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -168,28 +207,26 @@
   void Dclz(GpuRegister rd, GpuRegister rs);
   void Dclo(GpuRegister rd, GpuRegister rs);
 
-  void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);
-  void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);
-  void J(uint32_t addr26);
-  void Jal(uint32_t addr26);
   void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
   void Jr(GpuRegister rs);
-  void Auipc(GpuRegister rs, uint16_t imm16);  // R6
-  void Jic(GpuRegister rt, uint16_t imm16);  // R6
-  void Jialc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bltzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgtzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Blezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqzc(GpuRegister rs, uint32_t imm21);  // R6
-  void Bnezc(GpuRegister rs, uint32_t imm21);  // R6
+  void Auipc(GpuRegister rs, uint16_t imm16);
+  void Addiupc(GpuRegister rs, uint32_t imm19);
+  void Bc(uint32_t imm26);
+  void Jic(GpuRegister rt, uint16_t imm16);
+  void Jialc(GpuRegister rt, uint16_t imm16);
+  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bltzc(GpuRegister rt, uint16_t imm16);
+  void Bgtzc(GpuRegister rt, uint16_t imm16);
+  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgezc(GpuRegister rt, uint16_t imm16);
+  void Blezc(GpuRegister rt, uint16_t imm16);
+  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqzc(GpuRegister rs, uint32_t imm21);
+  void Bnezc(GpuRegister rs, uint32_t imm21);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -252,32 +289,34 @@
   void Clear(GpuRegister rd);
   void Not(GpuRegister rd, GpuRegister rs);
 
-  // Higher level composite instructions
+  // Higher level composite instructions.
   void LoadConst32(GpuRegister rd, int32_t value);
   void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
 
-  void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
-  void Bind(Label* label) OVERRIDE;  // R6
-  void Jump(Label* label) OVERRIDE {
-    B(label);
+  void Bind(Label* label) OVERRIDE {
+    Bind(down_cast<Mips64Label*>(label));
   }
-  void B(Label* label);  // R6
-  void Jalr(Label* label, GpuRegister indirect_reg = RA);  // R6
-  // TODO: implement common for R6 and non-R6 interface for conditional branches?
-  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bltzc(GpuRegister rt, Label* label);  // R6
-  void Bgtzc(GpuRegister rt, Label* label);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgezc(GpuRegister rt, Label* label);  // R6
-  void Blezc(GpuRegister rt, Label* label);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqzc(GpuRegister rs, Label* label);  // R6
-  void Bnezc(GpuRegister rs, Label* label);  // R6
+  void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
+    UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64";
+  }
+
+  void Bind(Mips64Label* label);
+  void Bc(Mips64Label* label);
+  void Jialc(Mips64Label* label, GpuRegister indirect_reg);
+  void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bltzc(GpuRegister rt, Mips64Label* label);
+  void Bgtzc(GpuRegister rt, Mips64Label* label);
+  void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bgezc(GpuRegister rt, Mips64Label* label);
+  void Blezc(GpuRegister rt, Mips64Label* label);
+  void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
+  void Beqzc(GpuRegister rs, Mips64Label* label);
+  void Bnezc(GpuRegister rs, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -289,43 +328,42 @@
   void Emit(uint32_t value);
 
   //
-  // Overridden common assembler high-level functionality
+  // Overridden common assembler high-level functionality.
   //
 
-  // Emit code that will create an activation on the stack
+  // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size, ManagedRegister method_reg,
                   const std::vector<ManagedRegister>& callee_save_regs,
                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
 
-  // Emit code that will remove an activation from the stack
+  // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
                    const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
 
-  // Store routines
+  // Store routines.
   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
 
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
 
-  void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm,
-                                ManagedRegister mscratch) OVERRIDE;
-
-  void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+  void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
                                   ManagedRegister mscratch) OVERRIDE;
 
-  void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE;
+  void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE;
 
   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
                      ManagedRegister mscratch) OVERRIDE;
 
-  // Load routines
+  // Load routines.
   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
 
-  void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE;
+  void LoadFromThread64(ManagedRegister mdest,
+                        ThreadOffset<kMipsDoublewordSize> src,
+                        size_t size) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
@@ -334,15 +372,16 @@
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
-  void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE;
+  void LoadRawPtrFromThread64(ManagedRegister mdest,
+                              ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE;
 
-  // Copying routines
+  // Copying routines.
   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
 
-  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs,
+  void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs,
                               ManagedRegister mscratch) OVERRIDE;
 
-  void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs,
+  void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs,
                             ManagedRegister mscratch) OVERRIDE;
 
   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
@@ -366,13 +405,13 @@
 
   void MemoryBarrier(ManagedRegister) OVERRIDE;
 
-  // Sign extension
+  // Sign extension.
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Zero extension
+  // Zero extension.
   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
-  // Exploit fast access in managed code to Thread::Current()
+  // Exploit fast access in managed code to Thread::Current().
   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
 
@@ -388,7 +427,7 @@
   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
                               mscratch, bool null_allowed) OVERRIDE;
 
-  // src holds a handle scope entry (Object**) load this into dst
+  // src holds a handle scope entry (Object**) load this into dst.
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
 
   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
@@ -396,39 +435,255 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
 
-  // Call to address held at [base+offset]
+  // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
-  void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE;
+  void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset,
+                        ManagedRegister mscratch) OVERRIDE;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
 
+  // Emit slow paths queued during assembly and promote short branches to long if needed.
+  void FinalizeCode() OVERRIDE;
+
+  // Emit branches and finalize all instructions.
+  void FinalizeInstructions(const MemoryRegion& region);
+
+  // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64,
+  // must be used instead of Mips64Label::GetPosition()).
+  uint32_t GetLabelLocation(Mips64Label* label) const;
+
+  // Get the final position of a label after local fixup based on the old position
+  // recorded before FinalizeCode().
+  uint32_t GetAdjustedPosition(uint32_t old_position);
+
+  enum BranchCondition {
+    kCondLT,
+    kCondGE,
+    kCondLE,
+    kCondGT,
+    kCondLTZ,
+    kCondGEZ,
+    kCondLEZ,
+    kCondGTZ,
+    kCondEQ,
+    kCondNE,
+    kCondEQZ,
+    kCondNEZ,
+    kCondLTU,
+    kCondGEU,
+    kUncond,
+  };
+  friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
+
  private:
+  class Branch {
+   public:
+    enum Type {
+      // Short branches.
+      kUncondBranch,
+      kCondBranch,
+      kCall,
+      // Long branches.
+      kLongUncondBranch,
+      kLongCondBranch,
+      kLongCall,
+    };
+
+    // Bit sizes of offsets defined as enums to minimize chance of typos.
+    enum OffsetBits {
+      kOffset16 = 16,
+      kOffset18 = 18,
+      kOffset21 = 21,
+      kOffset23 = 23,
+      kOffset28 = 28,
+      kOffset32 = 32,
+    };
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
+    static constexpr int32_t kMaxBranchLength = 32;
+    static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
+
+    struct BranchInfo {
+      // Branch length as a number of 4-byte-long instructions.
+      uint32_t length;
+      // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
+      // PC-relative offset (or its most significant 16-bit half, which goes first).
+      uint32_t instr_offset;
+      // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
+      // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
+      // instructions) from the instruction containing the offset.
+      uint32_t pc_org;
+      // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
+      // an exception: use kOffset23 for beqzc/bnezc).
+      OffsetBits offset_size;
+      // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
+      // count.
+      int offset_shift;
+    };
+    static const BranchInfo branch_info_[/* Type */];
+
+    // Unconditional branch.
+    Branch(uint32_t location, uint32_t target);
+    // Conditional branch.
+    Branch(uint32_t location,
+           uint32_t target,
+           BranchCondition condition,
+           GpuRegister lhs_reg,
+           GpuRegister rhs_reg = ZERO);
+    // Call (branch and link) that stores the target address in a given register (i.e. T9).
+    Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg);
+
+    // Some conditional branches with lhs = rhs are effectively NOPs, while some
+    // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
+    // So, we need a way to identify such branches in order to emit no instructions for them
+    // or change them to unconditional.
+    static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+    static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
+
+    static BranchCondition OppositeCondition(BranchCondition cond);
+
+    Type GetType() const;
+    BranchCondition GetCondition() const;
+    GpuRegister GetLeftRegister() const;
+    GpuRegister GetRightRegister() const;
+    uint32_t GetTarget() const;
+    uint32_t GetLocation() const;
+    uint32_t GetOldLocation() const;
+    uint32_t GetLength() const;
+    uint32_t GetOldLength() const;
+    uint32_t GetSize() const;
+    uint32_t GetOldSize() const;
+    uint32_t GetEndLocation() const;
+    uint32_t GetOldEndLocation() const;
+    bool IsLong() const;
+    bool IsResolved() const;
+
+    // Returns the bit size of the signed offset that the branch instruction can handle.
+    OffsetBits GetOffsetSize() const;
+
+    // Calculates the distance between two byte locations in the assembler buffer and
+    // returns the number of bits needed to represent the distance as a signed integer.
+    //
+    // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
+    // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
+    //
+    // Composite branches (made of several instructions) with longer reach have 32-bit
+    // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
+    // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end,
+    // however. Consider the following implementation of a long unconditional branch, for
+    // example:
+    //
+    //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
+    //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
+    //
+    // Both of the above instructions take 16-bit signed offsets as immediate operands.
+    // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
+    // due to sign extension. This must be compensated for by incrementing offset_31_16
+    // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
+    // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
+    // Therefore, the long branch range is something like from PC - 0x80000000 to
+    // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
+    //
+    // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
+    // case with the addiu instruction and a 16 bit offset.
+    static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+    // Resolve a branch when the target is known.
+    void Resolve(uint32_t target);
+
+    // Relocate a branch by a given delta if needed due to expansion of this or another
+    // branch at a given location by this delta (just changes location_ and target_).
+    void Relocate(uint32_t expand_location, uint32_t delta);
+
+    // If the branch is short, changes its type to long.
+    void PromoteToLong();
+
+    // If necessary, updates the type by promoting a short branch to a long branch
+    // based on the branch location and target. Returns the amount (in bytes) by
+    // which the branch size has increased.
+    // max_short_distance caps the maximum distance between location_ and target_
+    // that is allowed for short branches. This is for debugging/testing purposes.
+    // max_short_distance = 0 forces all short branches to become long.
+    // Use the implicit default argument when not debugging/testing.
+    uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
+
+    // Returns the location of the instruction(s) containing the offset.
+    uint32_t GetOffsetLocation() const;
+
+    // Calculates and returns the offset ready for encoding in the branch instruction(s).
+    uint32_t GetOffset() const;
+
+   private:
+    // Completes branch construction by determining and recording its type.
+    void InitializeType(bool is_call);
+    // Helper for the above.
+    void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
+
+    uint32_t old_location_;      // Offset into assembler buffer in bytes.
+    uint32_t location_;          // Offset into assembler buffer in bytes.
+    uint32_t target_;            // Offset into assembler buffer in bytes.
+
+    GpuRegister lhs_reg_;        // Left-hand side register in conditional branches or
+                                 // indirect call register.
+    GpuRegister rhs_reg_;        // Right-hand side register in conditional branches.
+    BranchCondition condition_;  // Condition for conditional branches.
+
+    Type type_;                  // Current type of the branch.
+    Type old_type_;              // Initial type of the branch.
+  };
+  friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
+  friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
+
   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct);
   void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
   void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
-  void EmitJ(int opcode, uint32_t addr26);
+  void EmitI26(int opcode, uint32_t imm26);
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
+  void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+
+  void Buncond(Mips64Label* label);
+  void Bcond(Mips64Label* label,
+             BranchCondition condition,
+             GpuRegister lhs,
+             GpuRegister rhs = ZERO);
+  void Call(Mips64Label* label, GpuRegister indirect_reg);
+  void FinalizeLabeledBranch(Mips64Label* label);
+
+  Branch* GetBranch(uint32_t branch_id);
+  const Branch* GetBranch(uint32_t branch_id) const;
+
+  void PromoteBranches();
+  void EmitBranch(Branch* branch);
+  void EmitBranches();
+  void PatchCFI();
+
+  // Emits exception block.
+  void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
+
+  // List of exception blocks to generate at the end of the code cache.
+  std::vector<Mips64ExceptionSlowPath> exception_blocks_;
+
+  std::vector<Branch> branches_;
+
+  // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+  bool overwriting_;
+  // The current overwrite location.
+  uint32_t overwrite_location_;
+
+  // Data for AdjustedPosition(), see the description there.
+  uint32_t last_position_adjustment_;
+  uint32_t last_old_position_;
+  uint32_t last_branch_id_;
 
   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
 };
 
-// Slowpath entered when Thread::Current()->_exception is non-null
-class Mips64ExceptionSlowPath FINAL : public SlowPath {
- public:
-  Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
-      : scratch_(scratch), stack_adjust_(stack_adjust) {}
-  virtual void Emit(Assembler *sp_asm) OVERRIDE;
- private:
-  const Mips64ManagedRegister scratch_;
-  const size_t stack_adjust_;
-};
-
 }  // namespace mips64
 }  // namespace art
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 4413906..29a5a88 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -24,6 +24,8 @@
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
 
+#define __ GetAssembler()->
+
 namespace art {
 
 struct MIPS64CpuRegisterCompare {
@@ -48,8 +50,26 @@
     return "mips64";
   }
 
+  std::string GetAssemblerCmdName() OVERRIDE {
+    // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details.
+    return "gcc";
+  }
+
   std::string GetAssemblerParameters() OVERRIDE {
-    return " --no-warn -march=mips64r6";
+    // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6
+    // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative
+    // branches in the .text section and so they require a relocation pass (there's a relocation
+    // section, .rela.text, that has the needed info to fix up the branches).
+    return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib";
+  }
+
+  void Pad(std::vector<uint8_t>& data) OVERRIDE {
+    // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple
+    // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't
+    // pad, so, in order for two assembler outputs to match, we need to match the padding as well.
+    // NOP is encoded as four zero bytes on MIPS.
+    size_t pad_size = RoundUp(data.size(), 16u) - data.size();
+    data.insert(data.end(), pad_size, 0);
   }
 
   std::string GetDisassembleParameters() OVERRIDE {
@@ -182,6 +202,71 @@
     return secondary_register_names_[reg];
   }
 
+  std::string RepeatInsn(size_t count, const std::string& insn) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+    }
+    return result;
+  }
+
+  void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+                                                                 mips64::Mips64Label*),
+                              std::string instr_name) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::A0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::A1, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a1, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
+  void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
+                                                                  mips64::GpuRegister,
+                                                                  mips64::Mips64Label*),
+                               std::string instr_name) {
+    mips64::Mips64Label label;
+    (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label);
+
+    std::string expected =
+        ".set noreorder\n" +
+        instr_name + " $a0, $a1, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        instr_name + " $a2, $a3, 1b\n"
+        "nop\n";
+    DriverStr(expected, instr_name);
+  }
+
  private:
   std::vector<mips64::GpuRegister*> registers_;
   std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_;
@@ -194,7 +279,6 @@
   EXPECT_TRUE(CheckTools());
 }
 
-
 ///////////////////
 // FP Operations //
 ///////////////////
@@ -348,7 +432,203 @@
 ////////////////
 
 TEST_F(AssemblerMIPS64Test, Jalr) {
-  DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+  DriverStr(".set noreorder\n" +
+            RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
+}
+
+TEST_F(AssemblerMIPS64Test, Jialc) {
+  mips64::Mips64Label label1, label2;
+  __ Jialc(&label1, mips64::T9);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Jialc(&label2, mips64::T9);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Jialc(&label1, mips64::T9);
+
+  std::string expected =
+      ".set noreorder\n"
+      "lapc $t9, 1f\n"
+      "jialc $t9, 0\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "lapc $t9, 2f\n"
+      "jialc $t9, 0\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "lapc $t9, 1b\n"
+      "jialc $t9, 0\n";
+  DriverStr(expected, "Jialc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongJialc) {
+  mips64::Mips64Label label1, label2;
+  __ Jialc(&label1, mips64::T9);
+  constexpr uint32_t kAdduCount1 = (1u << 18) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Jialc(&label2, mips64::T9);
+  constexpr uint32_t kAdduCount2 = (1u << 18) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Jialc(&label1, mips64::T9);
+
+  uint32_t offset_forward1 = 3 + kAdduCount1;  // 3: account for auipc, daddiu and jic.
+  offset_forward1 <<= 2;
+  offset_forward1 += (offset_forward1 & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  uint32_t offset_forward2 = 3 + kAdduCount2;  // 3: account for auipc, daddiu and jic.
+  offset_forward2 <<= 2;
+  offset_forward2 += (offset_forward2 & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  uint32_t offset_back = -(3 + kAdduCount2);  // 3: account for auipc, daddiu and jic.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in daddiu.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+      "jialc $t9, 0\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "1:\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+      "jialc $t9, 0\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "2:\n"
+      "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "jialc $t9, 0\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongJialc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc) {
+  mips64::Mips64Label label1, label2;
+  __ Bc(&label1);
+  constexpr size_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label1);
+  __ Bc(&label2);
+  constexpr size_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label2);
+  __ Bc(&label1);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc 1f\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bc 2f\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "bc 1b\n";
+  DriverStr(expected, "Bc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Blezc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgtzc) {
+  BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS64Test, Beqc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bnec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgec) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS64Test, Bltuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS64Test, Bgeuc) {
+  BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongBeqc) {
+  mips64::Mips64Label label;
+  __ Beqc(mips64::A0, mips64::A1, &label);
+  constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Bind(&label);
+  constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+  for (uint32_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+  }
+  __ Beqc(mips64::A2, mips64::A3, &label);
+
+  uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
+  offset_forward <<= 2;
+  offset_forward += (offset_forward & 0x8000) << 1;  // Account for sign extension in jic.
+
+  uint32_t offset_back = -(kAdduCount2 + 1);  // 1: account for bnec.
+  offset_back <<= 2;
+  offset_back += (offset_back & 0x8000) << 1;  // Account for sign extension in jic.
+
+  std::ostringstream oss;
+  oss <<
+      ".set noreorder\n"
+      "bnec $a0, $a1, 1f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+      "1:\n" <<
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+      "2:\n" <<
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+      "bnec $a2, $a3, 3f\n"
+      "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+      "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+      "3:\n";
+  std::string expected = oss.str();
+  DriverStr(expected, "LongBeqc");
 }
 
 //////////
diff --git a/compiler/vector_output_stream.h b/compiler/vector_output_stream.h
index 3c5877c..a3c58d0 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/vector_output_stream.h
@@ -29,9 +29,9 @@
  public:
   VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
 
-  virtual ~VectorOutputStream() {}
+  ~VectorOutputStream() OVERRIDE {}
 
-  bool WriteFully(const void* buffer, size_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
     if (static_cast<size_t>(offset_) == vector_->size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_->insert(vector_->end(), &start[0], &start[byte_count]);
@@ -45,7 +45,11 @@
     return true;
   }
 
-  off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE {
+    return true;
+  }
 
  private:
   void EnsureCapacity(off_t new_offset) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2653807..77211ce 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -55,8 +55,10 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_file.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
@@ -494,6 +496,7 @@
       app_image_(false),
       boot_image_(false),
       is_host_(false),
+      image_writer_(nullptr),
       driver_(nullptr),
       dump_stats_(false),
       dump_passes_(false),
@@ -1231,6 +1234,7 @@
 
     // Handle and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = nullptr;
+    jobject class_path_class_loader = nullptr;
     Thread* self = Thread::Current();
 
     if (!boot_image_option_.empty()) {
@@ -1248,10 +1252,12 @@
       key_value_store_->Put(OatHeader::kClassPathKey,
                             OatFile::EncodeDexFileDependencies(class_path_files));
 
-      // Then the dex files we'll compile. Thus we'll resolve the class-path first.
-      class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end());
+      class_path_class_loader = class_linker->CreatePathClassLoader(self,
+                                                                    class_path_files,
+                                                                    nullptr);
 
-      class_loader = class_linker->CreatePathClassLoader(self, class_path_files);
+      // Class path loader as parent so that we'll resolve there first.
+      class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader);
     }
 
     driver_.reset(new CompilerDriver(compiler_options_.get(),
@@ -1355,11 +1361,22 @@
       uint32_t image_file_location_oat_checksum = 0;
       uintptr_t image_file_location_oat_data_begin = 0;
       int32_t image_patch_delta = 0;
+
+      if (app_image_ && image_base_ == 0) {
+        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
+        image_base_ = RoundUp(
+            reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
+            kPageSize);
+        VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
+      }
+
       if (IsImage()) {
         PrepareImageWriter(image_base_);
-      } else {
+      }
+
+      if (!IsBootImage()) {
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
-        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
         image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
         image_file_location_oat_data_begin =
             reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatDataBegin());
@@ -1371,11 +1388,13 @@
         key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
       }
 
-      oat_writer.reset(new OatWriter(dex_files_, image_file_location_oat_checksum,
+      oat_writer.reset(new OatWriter(dex_files_,
+                                     image_file_location_oat_checksum,
                                      image_file_location_oat_data_begin,
                                      image_patch_delta,
                                      driver_.get(),
                                      image_writer_.get(),
+                                     IsBootImage(),
                                      timings_,
                                      key_value_store_.get()));
     }
@@ -1392,8 +1411,36 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
-                             oat_file_.get())) {
+      std::unique_ptr<ElfWriter> elf_writer =
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file_.get());
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      if (!oat_writer->WriteRodata(rodata)) {
+        LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      if (!oat_writer->WriteCode(text)) {
+        LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer->GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer->GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer->GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      if (!elf_writer->End()) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
         return false;
       }
@@ -1591,7 +1638,11 @@
   }
 
   void PrepareImageWriter(uintptr_t image_base) {
-    image_writer_.reset(new ImageWriter(*driver_, image_base, compiler_options_->GetCompilePic()));
+    DCHECK(IsImage());
+    image_writer_.reset(new ImageWriter(*driver_,
+                                        image_base,
+                                        compiler_options_->GetCompilePic(),
+                                        IsAppImage()));
   }
 
   // Let the ImageWriter write the image file. If we do not compile PIC, also fix up the oat file.
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 2466f33..dd1002c 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -40,11 +40,12 @@
  */
 static void usage(void) {
   fprintf(stderr, "Copyright (C) 2007 The Android Open Source Project\n\n");
-  fprintf(stderr, "%s: [-c] [-d] [-f] [-h] [-i] [-l layout] [-o outfile]"
+  fprintf(stderr, "%s: [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile]"
                   " [-t tempfile] dexfile...\n", gProgName);
   fprintf(stderr, "\n");
   fprintf(stderr, " -c : verify checksum and exit\n");
   fprintf(stderr, " -d : disassemble code sections\n");
+  fprintf(stderr, " -e : display exported items only\n");
   fprintf(stderr, " -f : display summary information from file header\n");
   fprintf(stderr, " -g : dump CFG for dex\n");
   fprintf(stderr, " -h : display file header details\n");
@@ -69,7 +70,7 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "cdfghil:t:o:");
+    const int ic = getopt(argc, argv, "cdefghil:t:o:");
     if (ic < 0) {
       break;  // done
     }
@@ -80,6 +81,9 @@
       case 'd':  // disassemble Dalvik instructions
         gOptions.disassemble = true;
         break;
+      case 'e':  // exported items only
+        gOptions.exportsOnly = true;
+        break;
       case 'f':  // dump outer file header
         gOptions.showFileHeaders = true;
         break;
@@ -98,7 +102,6 @@
         } else if (strcmp(optarg, "xml") == 0) {
           gOptions.outputFormat = OUTPUT_XML;
           gOptions.verbose = false;
-          gOptions.exportsOnly = true;
         } else {
           wantUsage = true;
         }
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 304d4e5..5e71053 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -814,7 +814,7 @@
 
   static const ImageHeader& GetBootImageHeader() {
     gc::Heap* heap = Runtime::Current()->GetHeap();
-    gc::space::ImageSpace* image_space = heap->GetImageSpace();
+    gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
     CHECK(image_space != nullptr);
     const ImageHeader& image_header = image_space->GetImageHeader();
     return image_header;
@@ -838,7 +838,7 @@
                      std::ostream* os, pid_t image_diff_pid) {
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
-  gc::space::ImageSpace* image_space = heap->GetImageSpace();
+  gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
   CHECK(image_space != nullptr);
   const ImageHeader& image_header = image_space->GetImageHeader();
   if (!image_header.IsValid()) {
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 2c18190..a926ca5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -47,7 +47,7 @@
     CommonRuntimeTest::SetUp();
 
     // We loaded the runtime with an explicit image. Therefore the image space must exist.
-    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
     ASSERT_TRUE(image_space != nullptr);
     boot_image_location_ = image_space->GetImageLocation();
   }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index ea61b43..d20f7d5 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -28,6 +28,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -98,33 +99,6 @@
 
 class OatSymbolizer FINAL {
  public:
-  class RodataWriter FINAL : public CodeOutput {
-   public:
-    explicit RodataWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
-
-    bool Write(OutputStream* out) OVERRIDE {
-      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-      return out->WriteFully(oat_file_->Begin(), rodata_size);
-    }
-
-   private:
-    const OatFile* oat_file_;
-  };
-
-  class TextWriter FINAL : public CodeOutput {
-   public:
-    explicit TextWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
-
-    bool Write(OutputStream* out) OVERRIDE {
-      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-      const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
-      return out->WriteFully(text_begin, oat_file_->End() - text_begin);
-    }
-
-   private:
-    const OatFile* oat_file_;
-  };
-
   OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
       oat_file_(oat_file), builder_(nullptr),
       output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
@@ -139,31 +113,57 @@
                                           uint32_t);
 
   bool Symbolize() {
-    Elf32_Word rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-    uint32_t size = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
-    uint32_t text_size = size - rodata_size;
-    uint32_t bss_size = oat_file_->BssSize();
-    RodataWriter rodata_writer(oat_file_);
-    TextWriter text_writer(oat_file_);
-    builder_.reset(new ElfBuilder<ElfTypes32>(
-        oat_file_->GetOatHeader().GetInstructionSet(),
-        rodata_size, &rodata_writer,
-        text_size, &text_writer,
-        bss_size));
+    const InstructionSet isa = oat_file_->GetOatHeader().GetInstructionSet();
+
+    File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
+    std::unique_ptr<BufferedOutputStream> output_stream(
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
+    builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
+
+    builder_->Start();
+
+    auto* rodata = builder_->GetRoData();
+    auto* text = builder_->GetText();
+    auto* bss = builder_->GetBss();
+    auto* strtab = builder_->GetStrTab();
+    auto* symtab = builder_->GetSymTab();
+
+    rodata->Start();
+    const uint8_t* rodata_begin = oat_file_->Begin();
+    const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+    rodata->WriteFully(rodata_begin, rodata_size);
+    rodata->End();
+
+    text->Start();
+    const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
+    const size_t text_size = oat_file_->End() - text_begin;
+    text->WriteFully(text_begin, text_size);
+    text->End();
+
+    if (oat_file_->BssSize() != 0) {
+      bss->Start();
+      bss->SetSize(oat_file_->BssSize());
+      bss->End();
+    }
+
+    builder_->WriteDynamicSection(elf_file->GetPath());
 
     Walk(&art::OatSymbolizer::RegisterForDedup);
 
     NormalizeState();
 
+    strtab->Start();
+    strtab->Write("");  // strtab should start with empty string.
     Walk(&art::OatSymbolizer::AddSymbol);
+    strtab->End();
 
-    File* elf_output = OS::CreateEmptyFile(output_name_.c_str());
-    bool result = builder_->Write(elf_output);
+    symtab->Start();
+    symtab->Write();
+    symtab->End();
 
-    // Ignore I/O errors.
-    UNUSED(elf_output->FlushClose());
+    builder_->End();
 
-    return result;
+    return builder_->Good();
   }
 
   void Walk(Callback callback) {
@@ -295,9 +295,8 @@
         pretty_name = "[Dedup]" + pretty_name;
       }
 
-      auto* symtab = builder_->GetSymtab();
-
-      symtab->AddSymbol(pretty_name, builder_->GetText(),
+      int name_offset = builder_->GetStrTab()->Write(pretty_name);
+      builder_->GetSymTab()->Add(name_offset, builder_->GetText(),
           oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(),
           true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC);
     }
@@ -1620,9 +1619,9 @@
       dex_caches_.clear();
       {
         ReaderMutexLock mu(self, *class_linker->DexLock());
-        for (jobject weak_root : class_linker->GetDexCaches()) {
+        for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
           mirror::DexCache* dex_cache =
-              down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+              down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
           if (dex_cache != nullptr) {
             dex_caches_.insert(dex_cache);
           }
@@ -2376,7 +2375,7 @@
 
   ScopedObjectAccess soa(Thread::Current());
   gc::Heap* heap = runtime->GetHeap();
-  gc::space::ImageSpace* image_space = heap->GetImageSpace();
+  gc::space::ImageSpace* image_space = heap->GetBootImageSpace();
   CHECK(image_space != nullptr);
   const ImageHeader& image_header = image_space->GetImageHeader();
   if (!image_header.IsValid()) {
@@ -2414,7 +2413,7 @@
 
   // Need a class loader.
   // Fake that we're a compiler.
-  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path);
+  jobject class_loader = class_linker->CreatePathClassLoader(self, class_path, /*parent*/nullptr);
 
   // Use the class loader while dumping.
   StackHandleScope<1> scope(self);
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 88622cc..3d9f7dc 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -177,18 +177,21 @@
   t.NewTiming("Image and oat Patching setup");
   // Create the map where we will write the image patches to.
   std::string error_msg;
-  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len, PROT_READ | PROT_WRITE, MAP_PRIVATE,
-                                                input_image->Fd(), 0,
+  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len,
+                                                PROT_READ | PROT_WRITE,
+                                                MAP_PRIVATE,
+                                                input_image->Fd(),
+                                                0,
+                                                /*low_4gb*/false,
                                                 input_image->GetPath().c_str(),
                                                 &error_msg));
   if (image.get() == nullptr) {
     LOG(ERROR) << "unable to map image file " << input_image->GetPath() << " : " << error_msg;
     return false;
   }
-  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetImageSpace();
+  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetBootImageSpace();
 
-  PatchOat p(isa, image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
-             delta, timings);
+  PatchOat p(isa, image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(), delta, timings);
   t.NewTiming("Patching files");
   if (!p.PatchImage()) {
     LOG(ERROR) << "Failed to patch image file " << input_image->GetPath();
@@ -273,15 +276,19 @@
   t.NewTiming("Image and oat Patching setup");
   // Create the map where we will write the image patches to.
   std::string error_msg;
-  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len, PROT_READ | PROT_WRITE, MAP_PRIVATE,
-                                                input_image->Fd(), 0,
+  std::unique_ptr<MemMap> image(MemMap::MapFile(image_len,
+                                                PROT_READ | PROT_WRITE,
+                                                MAP_PRIVATE,
+                                                input_image->Fd(),
+                                                0,
+                                                /*low_4gb*/false,
                                                 input_image->GetPath().c_str(),
                                                 &error_msg));
   if (image.get() == nullptr) {
     LOG(ERROR) << "unable to map image file " << input_image->GetPath() << " : " << error_msg;
     return false;
   }
-  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetImageSpace();
+  gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetBootImageSpace();
 
   std::unique_ptr<ElfFile> elf(ElfFile::Open(input_oat,
                                              PROT_READ | PROT_WRITE, MAP_PRIVATE, &error_msg));
@@ -519,13 +526,26 @@
   temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
 }
 
+class RelocatedPointerVisitor {
+ public:
+  explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
+
+  template <typename T>
+  T* operator()(T* ptr) const {
+    return patch_oat_->RelocatedAddressOfPointer(ptr);
+  }
+
+ private:
+  PatchOat* const patch_oat_;
+};
+
 void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) {
   auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>(
       img_roots->Get(ImageHeader::kDexCaches));
+  const size_t pointer_size = InstructionSetPointerSize(isa_);
   for (size_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
     auto* orig_dex_cache = dex_caches->GetWithoutChecks(i);
     auto* copy_dex_cache = RelocatedCopyOf(orig_dex_cache);
-    const size_t pointer_size = InstructionSetPointerSize(isa_);
     // Though the DexCache array fields are usually treated as native pointers, we set the full
     // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
     // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
@@ -536,10 +556,7 @@
         mirror::DexCache::StringsOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_strings)));
     if (orig_strings != nullptr) {
-      GcRoot<mirror::String>* copy_strings = RelocatedCopyOf(orig_strings);
-      for (size_t j = 0, num = orig_dex_cache->NumStrings(); j != num; ++j) {
-        copy_strings[j] = GcRoot<mirror::String>(RelocatedAddressOfPointer(orig_strings[j].Read()));
-      }
+      orig_dex_cache->FixupStrings(RelocatedCopyOf(orig_strings), RelocatedPointerVisitor(this));
     }
     GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes();
     GcRoot<mirror::Class>* relocated_types = RelocatedAddressOfPointer(orig_types);
@@ -547,10 +564,8 @@
         mirror::DexCache::ResolvedTypesOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_types)));
     if (orig_types != nullptr) {
-      GcRoot<mirror::Class>* copy_types = RelocatedCopyOf(orig_types);
-      for (size_t j = 0, num = orig_dex_cache->NumResolvedTypes(); j != num; ++j) {
-        copy_types[j] = GcRoot<mirror::Class>(RelocatedAddressOfPointer(orig_types[j].Read()));
-      }
+      orig_dex_cache->FixupResolvedTypes(RelocatedCopyOf(orig_types),
+                                         RelocatedPointerVisitor(this));
     }
     ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
     ArtMethod** relocated_methods = RelocatedAddressOfPointer(orig_methods);
@@ -581,25 +596,6 @@
   }
 }
 
-void PatchOat::FixupNativePointerArray(mirror::PointerArray* object) {
-  if (object->IsIntArray()) {
-    mirror::IntArray* arr = object->AsIntArray();
-    mirror::IntArray* copy_arr = down_cast<mirror::IntArray*>(RelocatedCopyOf(arr));
-    for (size_t j = 0, count2 = arr->GetLength(); j < count2; ++j) {
-      copy_arr->SetWithoutChecks<false>(
-          j, RelocatedAddressOfIntPointer(arr->GetWithoutChecks(j)));
-    }
-  } else {
-    CHECK(object->IsLongArray());
-    mirror::LongArray* arr = object->AsLongArray();
-    mirror::LongArray* copy_arr = down_cast<mirror::LongArray*>(RelocatedCopyOf(arr));
-    for (size_t j = 0, count2 = arr->GetLength(); j < count2; ++j) {
-      copy_arr->SetWithoutChecks<false>(
-          j, RelocatedAddressOfIntPointer(arr->GetWithoutChecks(j)));
-    }
-  }
-}
-
 bool PatchOat::PatchImage() {
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   CHECK_GT(image_->Size(), sizeof(ImageHeader));
@@ -667,17 +663,14 @@
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<kVerifyNone>(visitor, visitor);
   if (object->IsClass<kVerifyNone>()) {
-    auto* klass = object->AsClass();
-    auto* copy_klass = down_cast<mirror::Class*>(copy);
-    copy_klass->SetDexCacheStrings(RelocatedAddressOfPointer(klass->GetDexCacheStrings()));
-    copy_klass->SetSFieldsPtrUnchecked(RelocatedAddressOfPointer(klass->GetSFieldsPtr()));
-    copy_klass->SetIFieldsPtrUnchecked(RelocatedAddressOfPointer(klass->GetIFieldsPtr()));
-    copy_klass->SetDirectMethodsPtrUnchecked(
-        RelocatedAddressOfPointer(klass->GetDirectMethodsPtr()));
-    copy_klass->SetVirtualMethodsPtr(RelocatedAddressOfPointer(klass->GetVirtualMethodsPtr()));
+    const size_t pointer_size = InstructionSetPointerSize(isa_);
+    mirror::Class* klass = object->AsClass();
+    mirror::Class* copy_klass = down_cast<mirror::Class*>(copy);
+    RelocatedPointerVisitor native_visitor(this);
+    klass->FixupNativePointers(copy_klass, pointer_size, native_visitor);
     auto* vtable = klass->GetVTable();
     if (vtable != nullptr) {
-      FixupNativePointerArray(vtable);
+      vtable->Fixup(RelocatedCopyOf(vtable), pointer_size, native_visitor);
     }
     auto* iftable = klass->GetIfTable();
     if (iftable != nullptr) {
@@ -685,24 +678,12 @@
         if (iftable->GetMethodArrayCount(i) > 0) {
           auto* method_array = iftable->GetMethodArray(i);
           CHECK(method_array != nullptr);
-          FixupNativePointerArray(method_array);
+          method_array->Fixup(RelocatedCopyOf(method_array), pointer_size, native_visitor);
         }
       }
     }
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      const size_t pointer_size = InstructionSetPointerSize(isa_);
-      for (int32_t i = 0; i < klass->GetEmbeddedVTableLength(); ++i) {
-        copy_klass->SetEmbeddedVTableEntryUnchecked(i, RelocatedAddressOfPointer(
-            klass->GetEmbeddedVTableEntry(i, pointer_size)), pointer_size);
-      }
-      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-        copy_klass->SetEmbeddedImTableEntry(i, RelocatedAddressOfPointer(
-            klass->GetEmbeddedImTableEntry(i, pointer_size)), pointer_size);
-      }
-    }
-  }
-  if (object->GetClass() == mirror::Method::StaticClass() ||
-      object->GetClass() == mirror::Constructor::StaticClass()) {
+  } else if (object->GetClass() == mirror::Method::StaticClass() ||
+             object->GetClass() == mirror::Constructor::StaticClass()) {
     // Need to go update the ArtMethod.
     auto* dest = down_cast<mirror::AbstractMethod*>(copy);
     auto* src = down_cast<mirror::AbstractMethod*>(object);
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 87ecc61..0915014 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -102,8 +102,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
   void FixupMethod(ArtMethod* object, ArtMethod* copy)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void FixupNativePointerArray(mirror::PointerArray* object)
-      SHARED_REQUIRES(Locks::mutator_lock_);
   bool InHeap(mirror::Object*);
 
   // Patches oat in place, modifying the oat_file given to the constructor.
@@ -200,6 +198,7 @@
   TimingLogger* timings_;
 
   friend class FixupRootVisitor;
+  friend class RelocatedPointerVisitor;
   friend class PatchOatArtFieldVisitor;
   friend class PatchOatArtMethodVisitor;
   DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 1fdffe3..571a2f5 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -60,6 +60,7 @@
   gc/collector/concurrent_copying.cc \
   gc/collector/garbage_collector.cc \
   gc/collector/immune_region.cc \
+  gc/collector/immune_spaces.cc \
   gc/collector/mark_compact.cc \
   gc/collector/mark_sweep.cc \
   gc/collector/partial_mark_sweep.cc \
@@ -102,6 +103,7 @@
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 76c7c4f..be33b0e 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -164,9 +164,10 @@
   // Deoptimization from compiled code.
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 371cbb2..63285a4 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -147,9 +147,10 @@
   // Deoptimization from compiled code.
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index 395cee8..613bb5c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -39,7 +39,7 @@
   if (!needs_a53_835769_fix) {
     // Check to see if this is an expected variant.
     static const char* arm64_known_variants[] = {
-        "denver64"
+        "denver64", "kryo"
     };
     if (!FindVariantInArray(arm64_known_variants, arraysize(arm64_known_variants), variant)) {
       std::ostringstream os;
diff --git a/runtime/arch/mips/entrypoints_direct_mips.h b/runtime/arch/mips/entrypoints_direct_mips.h
index f9c5315..74e7638 100644
--- a/runtime/arch/mips/entrypoints_direct_mips.h
+++ b/runtime/arch/mips/entrypoints_direct_mips.h
@@ -45,7 +45,8 @@
       entrypoint == kQuickCmpgFloat ||
       entrypoint == kQuickCmplDouble ||
       entrypoint == kQuickCmplFloat ||
-      entrypoint == kQuickReadBarrierSlow;
+      entrypoint == kQuickReadBarrierSlow ||
+      entrypoint == kQuickReadBarrierForRootSlow;
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 59421dd..cba427d 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -271,10 +271,14 @@
   qpoints->pA64Store = QuasiAtomic::Write64;
   static_assert(IsDirectEntrypoint(kQuickA64Store), "Non-direct C stub marked direct.");
 
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierJni), "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
+  static_assert(IsDirectEntrypoint(kQuickReadBarrierForRootSlow),
+                "Direct C stub not marked direct.");
 };
 
 }  // namespace art
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 0f784ed..1096af0 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -59,6 +59,8 @@
   SP   = 29,  // Stack pointer.
   FP   = 30,  // Saved value/frame pointer.
   RA   = 31,  // Return address.
+  TR   = S1,  // ART Thread Register
+  TMP  = T8,  // scratch register (in addition to AT)
   kNumberOfCoreRegisters = 32,
   kNoRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 417d5fc..89f54dd 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -178,9 +178,10 @@
   qpoints->pA64Load = QuasiAtomic::Read64;
   qpoints->pA64Store = QuasiAtomic::Write64;
 
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
+  qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 1d10e5d..8746bad 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -431,9 +431,9 @@
           [referrer] "r"(referrer), [hidden] "r"(hidden)
         : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
           "t8", "t9", "k0", "k1", "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
+          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
+          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
           "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__)
 #define PUSH(reg) "pushq " # reg "\n\t .cfi_adjust_cfa_offset 8\n\t"
@@ -2151,4 +2151,39 @@
 #endif
 }
 
+TEST_F(StubTest, ReadBarrierForRoot) {
+#if defined(ART_USE_READ_BARRIER) && (defined(__i386__) || defined(__arm__) || \
+      defined(__aarch64__) || defined(__mips__) || (defined(__x86_64__) && !defined(__APPLE__)))
+  Thread* self = Thread::Current();
+
+  const uintptr_t readBarrierForRootSlow =
+      StubTest::GetEntrypoint(self, kQuickReadBarrierForRootSlow);
+
+  // Create an object
+  ScopedObjectAccess soa(self);
+  // garbage is created during ClassLinker::Init
+
+  StackHandleScope<1> hs(soa.Self());
+
+  Handle<mirror::String> obj(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello, world!")));
+
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  GcRoot<mirror::Class>& root = mirror::String::java_lang_String_;
+  size_t result = Invoke3(reinterpret_cast<size_t>(&root), 0U, 0U, readBarrierForRootSlow, self);
+
+  EXPECT_FALSE(self->IsExceptionPending());
+  EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
+  mirror::Class* klass = reinterpret_cast<mirror::Class*>(result);
+  EXPECT_EQ(klass, obj->GetClass());
+
+  // Tests done.
+#else
+  LOG(INFO) << "Skipping read_barrier_for_root_slow";
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping read_barrier_for_root_slow" << std::endl;
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 019546f..e200018 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -29,6 +29,7 @@
 
 // Read barrier entrypoints.
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
@@ -136,9 +137,10 @@
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
+  qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 };
 
 }  // namespace art
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2f485ae..463c9cf 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -788,7 +788,106 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // eax: uint32_t type_idx/return value, ecx: ArtMethod*
+    // ebx, edx: free
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx  // Load dex cache resolved types array
+                                                        // Load the class (edx)
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    testl %edx, %edx                                    // Check null class
+    jz   .Lart_quick_alloc_object_rosalloc_slow_path
+                                                        // Check class status
+    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%edx)
+    jne  .Lart_quick_alloc_object_rosalloc_slow_path
+                                                        // No fake dependence needed on x86
+                                                        // between status and flags load,
+                                                        // since each load is a load-acquire,
+                                                        // no loads reordering.
+                                                        // Check access flags has
+                                                        // kAccClassIsFinalizable
+    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%edx)
+    jnz   .Lart_quick_alloc_object_rosalloc_slow_path
+
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+                                                        // Check if the thread local allocation
+                                                        // stack has room
+    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi
+    cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %edi
+    jae  .Lart_quick_alloc_object_rosalloc_slow_path
+
+    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%edx), %edi    // Load the object size (edi)
+                                                        // Check if the size is for a thread
+                                                        // local allocation
+    cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %edi
+    ja   .Lart_quick_alloc_object_rosalloc_slow_path
+    decl %edi
+    shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %edi // Calculate the rosalloc bracket index
+                                                            // from object size.
+                                                            // Align up the size by the rosalloc
+                                                            // bracket quantum size and divide
+                                                            // by the quantum size and subtract
+                                                            // by 1. This code is a shorter but
+                                                            // equivalent version.
+                                                        // Load thread local rosalloc run (ebx)
+    movl THREAD_ROSALLOC_RUNS_OFFSET(%ebx, %edi, __SIZEOF_POINTER__), %ebx
+                                                        // Load free_list head (edi),
+                                                        // this will be the return value.
+    movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %edi
+    test %edi, %edi
+    jz   .Lart_quick_alloc_object_rosalloc_slow_path
+                                                        // Point of no slow path. Won't go to
+                                                        // the slow path from here on. Ok to
+                                                        // clobber eax and ecx.
+    movl %edi, %eax
+                                                        // Load the next pointer of the head
+                                                        // and update head of free list with
+                                                        // next pointer
+    movl ROSALLOC_SLOT_NEXT_OFFSET(%eax), %edi
+    movl %edi, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx)
+                                                        // Decrement size of free list by 1
+    decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%ebx)
+                                                        // Store the class pointer in the
+                                                        // header. This also overwrites the
+                                                        // next pointer. The offsets are
+                                                        // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
+    movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
+                                                        // Push the new object onto the thread
+                                                        // local allocation stack and
+                                                        // increment the thread local
+                                                        // allocation stack top.
+    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi
+    movl %eax, (%edi)
+    addl LITERAL(COMPRESSED_REFERENCE_SIZE), %edi
+    movl %edi, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx)
+                                                        // No fence needed for x86.
+    POP edi
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    POP edi
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME ebx, ebx // save ref containing registers for GC
+    // Outgoing argument set up
+    PUSH eax                      // alignment padding
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
+    CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
+    PUSH eax
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)  // cxx_name(arg0, arg1, Thread*)
+    addl LITERAL(16), %esp        // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME          // resotre frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
+END_FUNCTION art_quick_alloc_object_rosalloc
+
+
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 
 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -1588,14 +1687,22 @@
 END_FUNCTION art_nested_signal_return
 
 DEFINE_FUNCTION art_quick_read_barrier_slow
-    PUSH edx                        // pass arg3 - offset
-    PUSH ecx                        // pass arg2 - obj
-    PUSH eax                        // pass arg1 - ref
-    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
-    addl LITERAL(12), %esp          // pop arguments
+    PUSH edx                         // pass arg3 - offset
+    PUSH ecx                         // pass arg2 - obj
+    PUSH eax                         // pass arg1 - ref
+    call SYMBOL(artReadBarrierSlow)  // artReadBarrierSlow(ref, obj, offset)
+    addl LITERAL(12), %esp           // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_read_barrier_slow
 
+DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
+    PUSH eax                                // pass arg1 - root
+    call SYMBOL(artReadBarrierForRootSlow)  // artReadBarrierForRootSlow(root)
+    addl LITERAL(4), %esp                   // pop argument
+    CFI_ADJUST_CFA_OFFSET(-4)
+    ret
+END_FUNCTION art_quick_read_barrier_for_root_slow
+
     // TODO: implement these!
 UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index eae09ee..2b38c9d 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -30,6 +30,7 @@
 
 // Read barrier entrypoints.
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
+extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
@@ -140,9 +141,10 @@
   // Deoptimize
   qpoints->pDeoptimize = art_quick_deoptimize_from_compiled_code;
 
-  // Read barrier
+  // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   qpoints->pReadBarrierSlow = art_quick_read_barrier_slow;
+  qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow;
 #endif  // __APPLE__
 };
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 95f0ccb..17d277e 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -809,7 +809,90 @@
 
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
+    // Fast path rosalloc allocation.
+    // RDI: type_idx, RSI: ArtMethod*, RAX: return value
+    // RDX, RCX, R8, R9: free.
+    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx   // Load dex cache resolved types array
+                                                              // Load the class (edx)
+    movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
+    testl  %edx, %edx                                         // Check null class
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Check class status.
+    cmpl   LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
+    jne    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // We don't need a fence (between the
+                                                              // the status and the access flag
+                                                              // loads) here because every load is
+                                                              // a load acquire on x86.
+                                                              // Check access flags has
+                                                              // kAccClassIsFinalizable
+    testl  LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
+    jnz    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Check if the thread local
+                                                              // allocation stack has room.
+    movq   %gs:THREAD_SELF_OFFSET, %r8                        // r8 = thread
+    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx     // rcx = alloc stack top.
+    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
+    jae    .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Load the object size
+    movl   MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax
+                                                              // Check if the size is for a thread
+                                                              // local allocation
+    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
+    ja     .Lart_quick_alloc_object_rosalloc_slow_path
+                                                              // Compute the rosalloc bracket index
+                                                              // from the size.
+                                                              // Align up the size by the rosalloc
+                                                              // bracket quantum size and divide
+                                                              // by the quantum size and subtract
+                                                              // by 1. This code is a shorter but
+                                                              // equivalent version.
+    subq   LITERAL(1), %rax
+    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
+                                                              // Load the rosalloc run (r9)
+    movq   THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9
+                                                              // Load the free list head (rax). This
+                                                              // will be the return val.
+    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
+    testq  %rax, %rax
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
+                                                              // Push the new object onto the thread
+                                                              // local allocation stack and
+                                                              // increment the thread local
+                                                              // allocation stack top.
+    movl   %eax, (%rcx)
+    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
+    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
+                                                              // Load the next pointer of the head
+                                                              // and update the list head with the
+                                                              // next pointer.
+    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
+    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
+                                                              // Store the class pointer in the
+                                                              // header. This also overwrites the
+                                                              // next pointer. The offsets are
+                                                              // asserted to match.
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+    POISON_HEAP_REF edx
+    movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+                                                              // Decrement the size of the free list
+    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
+                                                              // No fence necessary for x86.
+    ret
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                         // save ref containing registers for GC
+    // Outgoing argument set up
+    movq %gs:THREAD_SELF_OFFSET, %rdx                         // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)               // cxx_name(arg0, arg1, Thread*)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                       // restore frame up to return address
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                   // return or deliver exception
+END_FUNCTION art_quick_alloc_object_rosalloc
+
 // A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
 DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
@@ -1639,3 +1722,14 @@
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
 END_FUNCTION art_quick_read_barrier_slow
+
+DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
+    SETUP_FP_CALLEE_SAVE_FRAME
+    subq LITERAL(8), %rsp                  // Alignment padding.
+    CFI_ADJUST_CFA_OFFSET(8)
+    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
+    addq LITERAL(8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-8)
+    RESTORE_FP_CALLEE_SAVE_FRAME
+    ret
+END_FUNCTION art_quick_read_barrier_for_root_slow
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f4a5f23..47f2569 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -67,6 +67,18 @@
                                                              dex_cache);
 }
 
+void ArtMethod::ThrowInvocationTimeError() {
+  DCHECK(!IsInvokable());
+  // NOTE: IsDefaultConflicting must be first since the actual method might or might not be abstract
+  //       due to the way we select it.
+  if (IsDefaultConflicting()) {
+    ThrowIncompatibleClassChangeErrorForMethodConflict(this);
+  } else {
+    DCHECK(IsAbstract());
+    ThrowAbstractMethodError(this);
+  }
+}
+
 InvokeType ArtMethod::GetInvokeType() {
   // TODO: kSuper?
   if (GetDeclaringClass()->IsInterface()) {
@@ -94,17 +106,16 @@
   return num_registers;
 }
 
-static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ArtMethod::HasSameNameAndSignature(ArtMethod* other) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
-  const DexFile* dex_file = method1->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
-  if (method1->GetDexCache() == method2->GetDexCache()) {
-    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetDexMethodIndex());
+  if (GetDexCache() == other->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(other->GetDexMethodIndex());
     return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
   }
-  const DexFile* dex_file2 = method2->GetDexFile();
-  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file2 = other->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(other->GetDexMethodIndex());
   if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
     return false;  // Name mismatch.
   }
@@ -137,8 +148,7 @@
         mirror::Class* interface = iftable->GetInterface(i);
         for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
           ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(
-              this, interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
+          if (HasSameNameAndSignature(interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
             result = interface_method;
             break;
           }
@@ -146,8 +156,9 @@
       }
     }
   }
-  DCHECK(result == nullptr || HasSameNameAndSignature(
-      GetInterfaceMethodIfProxy(sizeof(void*)), result->GetInterfaceMethodIfProxy(sizeof(void*))));
+  DCHECK(result == nullptr ||
+         GetInterfaceMethodIfProxy(sizeof(void*))->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(sizeof(void*))));
   return result;
 }
 
@@ -330,6 +341,10 @@
   RegisterNative(GetJniDlsymLookupStub(), false);
 }
 
+bool ArtMethod::IsOverridableByDefaultMethod() {
+  return GetDeclaringClass()->IsInterface();
+}
+
 bool ArtMethod::EqualParameters(Handle<mirror::ObjectArray<mirror::Class>> params) {
   auto* dex_cache = GetDexCache();
   auto* dex_file = dex_cache->GetDexFile();
@@ -367,6 +382,10 @@
 }
 
 const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) {
+  // Our callers should make sure they don't pass the instrumentation exit pc,
+  // as this method does not look at the side instrumentation stack.
+  DCHECK_NE(pc, reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()));
+
   if (IsRuntimeMethod()) {
     return nullptr;
   }
@@ -434,7 +453,7 @@
   }
   const void* oat_entry_point = oat_method.GetQuickCode();
   if (oat_entry_point == nullptr || class_linker->IsQuickGenericJniStub(oat_entry_point)) {
-    DCHECK(IsNative());
+    DCHECK(IsNative()) << PrettyMethod(this);
     return nullptr;
   }
 
@@ -445,12 +464,6 @@
     return method_header;
   }
 
-  if (pc == reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc())) {
-    // If we're instrumenting, just return the compiled OAT code.
-    // TODO(ngeoffray): Avoid this call path.
-    return method_header;
-  }
-
   DCHECK(method_header->Contains(pc))
       << PrettyMethod(this)
       << std::hex << pc << " " << oat_entry_point
@@ -458,6 +471,16 @@
   return method_header;
 }
 
+bool ArtMethod::HasAnyCompiledCode() {
+  // Check whether the JIT has compiled it.
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->GetCodeCache()->ContainsMethod(this)) {
+    return true;
+  }
+
+  // Check whether we have AOT code.
+  return Runtime::Current()->GetClassLinker()->GetOatMethodQuickCodeFor(this) != nullptr;
+}
 
 void ArtMethod::CopyFrom(ArtMethod* src, size_t image_pointer_size) {
   memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index ce9f202..8efad88 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -136,6 +136,19 @@
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
+  // Returns true if invoking this method will not throw an AbstractMethodError or
+  // IncompatibleClassChangeError.
+  bool IsInvokable() {
+    return !IsAbstract() && !IsDefaultConflicting();
+  }
+
+  // A default conflict method is a special sentinel method that stands for a conflict between
+  // multiple default methods. It cannot be invoked, throwing an IncompatibleClassChangeError if one
+  // attempts to do so.
+  bool IsDefaultConflicting() {
+    return (GetAccessFlags() & kAccDefaultConflict) != 0u;
+  }
+
   // This is set by the class linker.
   bool IsDefault() {
     return (GetAccessFlags() & kAccDefault) != 0;
@@ -170,12 +183,14 @@
   }
 
   // Returns true if this method could be overridden by a default method.
-  bool IsOverridableByDefaultMethod() {
-    return IsDefault() || IsAbstract();
-  }
+  bool IsOverridableByDefaultMethod() SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool CheckIncompatibleClassChange(InvokeType type) SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Throws the error that would result from trying to invoke this method (i.e.
+  // IncompatibleClassChangeError or AbstractMethodError). Only call if !IsInvokable();
+  void ThrowInvocationTimeError() SHARED_REQUIRES(Locks::mutator_lock_);
+
   uint16_t GetMethodIndex() SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Doesn't do erroneous / unresolved class checks.
@@ -248,6 +263,9 @@
   mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns true if this method has the same name and signature of the other method.
+  bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -454,6 +472,9 @@
   const OatQuickMethodHeader* GetOatQuickMethodHeader(uintptr_t pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns whether the method has any compiled code, JIT or AOT.
+  bool HasAnyCompiledCode() SHARED_REQUIRES(Locks::mutator_lock_);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 69f6fe9..b548dfb 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -122,7 +122,7 @@
             art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
 
 // Offset of field Thread::tlsPtr_.thread_local_pos.
-#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 150 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_POS_OFFSET (THREAD_CARD_TABLE_OFFSET + 151 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET,
             art::Thread::ThreadLocalPosOffset<__SIZEOF_POINTER__>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_end.
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 71afa0f..771b2d0 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -316,22 +316,22 @@
 }
 
 void* ArenaAllocator::AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind) {
+  // We mark all memory for a newly retrieved arena as inaccessible and then
+  // mark only the actually allocated memory as defined. That leaves red zones
+  // and padding between allocations marked as inaccessible.
   size_t rounded_bytes = RoundUp(bytes + kMemoryToolRedZoneBytes, 8);
   if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
     // Obtain a new block.
     ObtainNewArenaForAllocation(rounded_bytes);
     CHECK(ptr_ != nullptr);
-    MEMORY_TOOL_MAKE_UNDEFINED(ptr_, end_ - ptr_);
+    MEMORY_TOOL_MAKE_NOACCESS(ptr_, end_ - ptr_);
   }
   ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
   uint8_t* ret = ptr_;
   ptr_ += rounded_bytes;
-  // Check that the memory is already zeroed out.
-  for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) {
-    CHECK_EQ(*ptr, 0U);
-  }
   MEMORY_TOOL_MAKE_DEFINED(ret, bytes);
-  MEMORY_TOOL_MAKE_NOACCESS(ret + bytes, rounded_bytes - bytes);
+  // Check that the memory is already zeroed out.
+  DCHECK(std::all_of(ret, ret + bytes, [](uint8_t val) { return val == 0u; }));
   return ret;
 }
 
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index ace6c38..36334c4 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -288,6 +288,11 @@
   DISALLOW_COPY_AND_ASSIGN(ArenaPool);
 };
 
+// Fast single-threaded allocator for zero-initialized memory chunks.
+//
+// Memory is allocated from ArenaPool in large chunks and then rationed through
+// the ArenaAllocator. It's returned to the ArenaPool only when the ArenaAllocator
+// is destroyed.
 class ArenaAllocator
     : private DebugStackRefCounter, private ArenaAllocatorStats, private ArenaAllocatorMemoryTool {
  public:
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 9c78ee5..d6a44f7 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -53,6 +53,7 @@
           : __builtin_ctzll(x);
 }
 
+// Return the number of 1-bits in `x`.
 template<typename T>
 static constexpr int POPCOUNT(T x) {
   return (sizeof(T) == sizeof(uint32_t))
diff --git a/runtime/base/scoped_arena_allocator.cc b/runtime/base/scoped_arena_allocator.cc
index 31f96e4..90c6ee3 100644
--- a/runtime/base/scoped_arena_allocator.cc
+++ b/runtime/base/scoped_arena_allocator.cc
@@ -91,16 +91,19 @@
 }
 
 void* ArenaStack::AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind) {
+  // We mark all memory for a newly retrieved arena as inaccessible and then
+  // mark only the actually allocated memory as defined. That leaves red zones
+  // and padding between allocations marked as inaccessible.
   size_t rounded_bytes = RoundUp(bytes + kMemoryToolRedZoneBytes, 8);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
     CHECK(ptr != nullptr) << "Failed to allocate memory";
+    MEMORY_TOOL_MAKE_NOACCESS(ptr, top_end_);
   }
   CurrentStats()->RecordAlloc(bytes, kind);
   top_ptr_ = ptr + rounded_bytes;
   MEMORY_TOOL_MAKE_UNDEFINED(ptr, bytes);
-  MEMORY_TOOL_MAKE_NOACCESS(ptr + bytes, rounded_bytes - bytes);
   return ptr;
 }
 
diff --git a/runtime/base/scoped_arena_allocator.h b/runtime/base/scoped_arena_allocator.h
index a30c73d..a87153b 100644
--- a/runtime/base/scoped_arena_allocator.h
+++ b/runtime/base/scoped_arena_allocator.h
@@ -42,6 +42,7 @@
 static constexpr size_t kArenaAlignment = 8;
 
 // Holds a list of Arenas for use by ScopedArenaAllocator stack.
+// The memory is returned to the ArenaPool when the ArenaStack is destroyed.
 class ArenaStack : private DebugStackRefCounter, private ArenaAllocatorMemoryTool {
  public:
   explicit ArenaStack(ArenaPool* arena_pool);
@@ -121,6 +122,12 @@
   DISALLOW_COPY_AND_ASSIGN(ArenaStack);
 };
 
+// Fast single-threaded allocator. Allocated chunks are _not_ guaranteed to be zero-initialized.
+//
+// Unlike the ArenaAllocator, ScopedArenaAllocator is intended for relatively short-lived
+// objects and allows nesting multiple allocators. Only the top allocator can be used but
+// once it's destroyed, its memory can be reused by the next ScopedArenaAllocator on the
+// stack. This is facilitated by returning the memory to the ArenaStack.
 class ScopedArenaAllocator
     : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats {
  public:
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index 0949619..324ab21 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -149,6 +149,13 @@
   return it != container.end();
 }
 
+// const char* compare function suitable for std::map or std::set.
+struct CStringLess {
+  bool operator()(const char* lhs, const char* rhs) const {
+    return strcmp(lhs, rhs) < 0;
+  }
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index da70456..2dd2a83 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -104,12 +104,13 @@
   va_end(args);
 }
 
-bool ClassLinker::HasInitWithString(Thread* self, const char* descriptor) {
+static bool HasInitWithString(Thread* self, ClassLinker* class_linker, const char* descriptor)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = self->GetCurrentMethod(nullptr);
   StackHandleScope<1> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(method != nullptr ?
       method->GetDeclaringClass()->GetClassLoader() : nullptr));
-  mirror::Class* exception_class = FindClass(self, descriptor, class_loader);
+  mirror::Class* exception_class = class_linker->FindClass(self, descriptor, class_loader);
 
   if (exception_class == nullptr) {
     // No exc class ~ no <init>-with-string.
@@ -119,11 +120,40 @@
   }
 
   ArtMethod* exception_init_method = exception_class->FindDeclaredDirectMethod(
-      "<init>", "(Ljava/lang/String;)V", image_pointer_size_);
+      "<init>", "(Ljava/lang/String;)V", class_linker->GetImagePointerSize());
   return exception_init_method != nullptr;
 }
 
-void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c) {
+// Helper for ThrowEarlierClassFailure. Throws the stored error.
+static void HandleEarlierVerifyError(Thread* self, ClassLinker* class_linker, mirror::Class* c)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = c->GetVerifyError();
+  DCHECK(obj != nullptr);
+  self->AssertNoPendingException();
+  if (obj->IsClass()) {
+    // Previous error has been stored as class. Create a new exception of that type.
+
+    // It's possible the exception doesn't have a <init>(String).
+    std::string temp;
+    const char* descriptor = obj->AsClass()->GetDescriptor(&temp);
+
+    if (HasInitWithString(self, class_linker, descriptor)) {
+      self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
+    } else {
+      self->ThrowNewException(descriptor, nullptr);
+    }
+  } else {
+    // Previous error has been stored as an instance. Just rethrow.
+    mirror::Class* throwable_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_Throwable)->AsClass();
+    mirror::Class* error_class = obj->GetClass();
+    CHECK(throwable_class->IsAssignableFrom(error_class));
+    self->SetException(obj->AsThrowable());
+  }
+  self->AssertPendingException();
+}
+
+void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def) {
   // The class failed to initialize on a previous attempt, so we want to throw
   // a NoClassDefFoundError (v2 2.17.5).  The exception to this rule is if we
   // failed in verification, in which case v2 5.4.1 says we need to re-throw
@@ -131,8 +161,11 @@
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
     std::string extra;
-    if (c->GetVerifyErrorClass() != nullptr) {
-      extra = PrettyDescriptor(c->GetVerifyErrorClass());
+    if (c->GetVerifyError() != nullptr) {
+      mirror::Class* descr_from = c->GetVerifyError()->IsClass()
+                                      ? c->GetVerifyError()->AsClass()
+                                      : c->GetVerifyError()->GetClass();
+      extra = PrettyDescriptor(descr_from);
     }
     LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra;
   }
@@ -144,20 +177,16 @@
     mirror::Throwable* pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
     self->SetException(pre_allocated);
   } else {
-    if (c->GetVerifyErrorClass() != nullptr) {
-      // TODO: change the verifier to store an _instance_, with a useful detail message?
-      // It's possible the exception doesn't have a <init>(String).
-      std::string temp;
-      const char* descriptor = c->GetVerifyErrorClass()->GetDescriptor(&temp);
-
-      if (HasInitWithString(self, descriptor)) {
-        self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
-      } else {
-        self->ThrowNewException(descriptor, nullptr);
-      }
-    } else {
-      self->ThrowNewException("Ljava/lang/NoClassDefFoundError;",
-                              PrettyDescriptor(c).c_str());
+    if (c->GetVerifyError() != nullptr) {
+      // Rethrow stored error.
+      HandleEarlierVerifyError(self, this, c);
+    }
+    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+      // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
+      // the top-level exception must be a NoClassDefFoundError. The potentially already pending
+      // exception will be a cause.
+      self->ThrowNewWrappedException("Ljava/lang/NoClassDefFoundError;",
+                                     PrettyDescriptor(c).c_str());
     }
   }
 }
@@ -274,7 +303,7 @@
 ClassLinker::ClassLinker(InternTable* intern_table)
     // dex_lock_ is recursive as it may be used in stack dumping.
     : dex_lock_("ClassLinker dex lock", kDefaultMutexLevel),
-      dex_cache_image_class_lookup_required_(false),
+      dex_cache_boot_image_class_lookup_required_(false),
       failed_dex_cache_class_lookups_(0),
       class_roots_(nullptr),
       array_iftable_(nullptr),
@@ -293,7 +322,8 @@
   std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr));
 }
 
-void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) {
+bool ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
+                                   std::string* error_msg) {
   VLOG(startup) << "ClassLinker::Init";
 
   Thread* const self = Thread::Current();
@@ -339,10 +369,12 @@
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusLoaded, self);
 
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
-  runtime->SetSentinel(heap->AllocObject<true>(self,
-                                               java_lang_Object.Get(),
-                                               java_lang_Object->GetObjectSize(),
-                                               VoidFunctor()));
+  // Allocate in non-movable so that it's possible to check if a JNI weak global ref has been
+  // cleared without triggering the read barrier and unintentionally mark the sentinel alive.
+  runtime->SetSentinel(heap->AllocNonMovableObject<true>(self,
+                                                         java_lang_Object.Get(),
+                                                         java_lang_Object->GetObjectSize(),
+                                                         VoidFunctor()));
 
   // Object[] next to hold class roots.
   Handle<mirror::Class> object_array_class(hs.NewHandle(
@@ -446,9 +478,15 @@
   // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
   // these roots.
-  CHECK_NE(0U, boot_class_path.size());
+  if (boot_class_path.empty()) {
+    *error_msg = "Boot classpath is empty.";
+    return false;
+  }
   for (auto& dex_file : boot_class_path) {
-    CHECK(dex_file.get() != nullptr);
+    if (dex_file.get() == nullptr) {
+      *error_msg = "Null dex file.";
+      return false;
+    }
     AppendToBootClassPath(self, *dex_file);
     opened_dex_files_.push_back(std::move(dex_file));
   }
@@ -629,6 +667,8 @@
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromCompiler exiting";
+
+  return true;
 }
 
 void ClassLinker::FinishInit(Thread* self) {
@@ -765,7 +805,7 @@
       CHECK_EQ(field.GetDeclaringClass(), klass);
     }
     auto* runtime = Runtime::Current();
-    auto* image_space = runtime->GetHeap()->GetImageSpace();
+    auto* image_space = runtime->GetHeap()->GetBootImageSpace();
     auto pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
     for (auto& m : klass->GetDirectMethods(pointer_size)) {
       SanityCheckArtMethod(&m, klass, image_space);
@@ -819,17 +859,17 @@
   DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor);
 };
 
-void ClassLinker::InitFromImage() {
+bool ClassLinker::InitFromImage(std::string* error_msg) {
   VLOG(startup) << "ClassLinker::InitFromImage entering";
   CHECK(!init_done_);
 
   Runtime* const runtime = Runtime::Current();
   Thread* const self = Thread::Current();
   gc::Heap* const heap = runtime->GetHeap();
-  gc::space::ImageSpace* const space = heap->GetImageSpace();
+  gc::space::ImageSpace* const space = heap->GetBootImageSpace();
   CHECK(space != nullptr);
   image_pointer_size_ = space->GetImageHeader().GetPointerSize();
-  dex_cache_image_class_lookup_required_ = true;
+  dex_cache_boot_image_class_lookup_required_ = true;
   const OatFile* oat_file = runtime->GetOatFileManager().RegisterImageOatFile(space);
   DCHECK(oat_file != nullptr);
   CHECK_EQ(oat_file->GetOatHeader().GetImageFileLocationOatChecksum(), 0U);
@@ -847,8 +887,8 @@
       hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
 
   Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
-          space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
-          AsObjectArray<mirror::Class>()));
+      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
+      AsObjectArray<mirror::Class>()));
   class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
 
   // Special case of setting up the String class early so that we can test arbitrary objects
@@ -857,27 +897,39 @@
 
   mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
-  Runtime::Current()->SetSentinel(Runtime::Current()->GetHeap()->AllocObject<true>(self,
-                                                          java_lang_Object,
-                                                          java_lang_Object->GetObjectSize(),
-                                                          VoidFunctor()));
+  // Allocate in non-movable so that it's possible to check if a JNI weak global ref has been
+  // cleared without triggering the read barrier and unintentionally mark the sentinel alive.
+  runtime->SetSentinel(heap->AllocNonMovableObject<true>(self,
+                                                         java_lang_Object,
+                                                         java_lang_Object->GetObjectSize(),
+                                                         VoidFunctor()));
 
-  CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(),
-           static_cast<uint32_t>(dex_caches->GetLength()));
+  if (oat_file->GetOatHeader().GetDexFileCount() !=
+      static_cast<uint32_t>(dex_caches->GetLength())) {
+    *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from "
+                 "image";
+    return false;
+  }
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     StackHandleScope<1> hs2(self);
     Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i)));
     const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
     const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(),
                                                                       nullptr);
-    CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location;
-    std::string error_msg;
-    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg);
+    if (oat_dex_file == nullptr) {
+      *error_msg = StringPrintf("Failed finding oat dex file for %s %s",
+                                oat_file->GetLocation().c_str(),
+                                dex_file_location.c_str());
+      return false;
+    }
+    std::string inner_error_msg;
+    std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg);
     if (dex_file == nullptr) {
-      LOG(FATAL) << "Failed to open dex file " << dex_file_location
-                 << " from within oat file " << oat_file->GetLocation()
-                 << " error '" << error_msg << "'";
-      UNREACHABLE();
+      *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'",
+                                dex_file_location.c_str(),
+                                oat_file->GetLocation().c_str(),
+                                inner_error_msg.c_str());
+      return false;
     }
 
     if (kSanityCheckObjects) {
@@ -887,13 +939,22 @@
                                        space);
     }
 
-    CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
+    if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) {
+      *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x",
+                                dex_file_location.c_str(),
+                                dex_file->GetLocationChecksum(),
+                                oat_dex_file->GetDexFileLocationChecksum());
+      return false;
+    }
 
     AppendToBootClassPath(*dex_file.get(), dex_cache);
     opened_dex_files_.push_back(std::move(dex_file));
   }
 
-  CHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_;
+  if (!ValidPointerSize(image_pointer_size_)) {
+    *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_);
+    return false;
+  }
 
   // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live
   // bitmap walk.
@@ -901,7 +962,12 @@
     // Only the Aot compiler supports having an image with a different pointer size than the
     // runtime. This happens on the host for compile 32 bit tests since we use a 64 bit libart
     // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps.
-    CHECK_EQ(image_pointer_size_, sizeof(void*));
+    if (image_pointer_size_ != sizeof(void*)) {
+      *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu",
+                                image_pointer_size_ ,
+                                sizeof(void*));
+      return false;
+    }
   }
 
   if (kSanityCheckObjects) {
@@ -954,6 +1020,8 @@
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
+
+  return true;
 }
 
 bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
@@ -1057,8 +1125,8 @@
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor) {
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -1829,7 +1897,7 @@
 
 // Special case to get oat code without overwriting a trampoline.
 const void* ClassLinker::GetQuickOatCodeFor(ArtMethod* method) {
-  CHECK(!method->IsAbstract()) << PrettyMethod(method);
+  CHECK(method->IsInvokable()) << PrettyMethod(method);
   if (method->IsProxyMethod()) {
     return GetQuickProxyInvokeHandler();
   }
@@ -1849,7 +1917,7 @@
 }
 
 const void* ClassLinker::GetOatMethodQuickCodeFor(ArtMethod* method) {
-  if (method->IsNative() || method->IsAbstract() || method->IsProxyMethod()) {
+  if (method->IsNative() || !method->IsInvokable() || method->IsProxyMethod()) {
     return nullptr;
   }
   bool found;
@@ -1944,6 +2012,13 @@
   // Ignore virtual methods on the iterator.
 }
 
+void ClassLinker::EnsureThrowsInvocationError(ArtMethod* method) {
+  DCHECK(method != nullptr);
+  DCHECK(!method->IsInvokable());
+  method->SetEntryPointFromQuickCompiledCodePtrSize(quick_to_interpreter_bridge_trampoline_,
+                                                    image_pointer_size_);
+}
+
 void ClassLinker::LinkCode(ArtMethod* method, const OatFile::OatClass* oat_class,
                            uint32_t class_def_method_index) {
   Runtime* const runtime = Runtime::Current();
@@ -1963,8 +2038,8 @@
   // Install entry point from interpreter.
   bool enter_interpreter = NeedsInterpreter(method, method->GetEntryPointFromQuickCompiledCode());
 
-  if (method->IsAbstract()) {
-    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+  if (!method->IsInvokable()) {
+    EnsureThrowsInvocationError(method);
     return;
   }
 
@@ -2126,8 +2201,6 @@
         last_field_idx = field_idx;
       }
     }
-    klass->SetSFieldsPtr(sfields);
-    DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     // Load instance fields.
     LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self,
                                                                 allocator,
@@ -2149,8 +2222,17 @@
       LOG(WARNING) << "Duplicate fields in class " << PrettyDescriptor(klass.Get())
           << " (unique static fields: " << num_sfields << "/" << it.NumStaticFields()
           << ", unique instance fields: " << num_ifields << "/" << it.NumInstanceFields() << ")";
-      // NOTE: Not shrinking the over-allocated sfields/ifields.
+      // NOTE: Not shrinking the over-allocated sfields/ifields, just setting size.
+      if (sfields != nullptr) {
+        sfields->SetSize(num_sfields);
+      }
+      if (ifields != nullptr) {
+        ifields->SetSize(num_ifields);
+      }
     }
+    // Set the field arrays.
+    klass->SetSFieldsPtr(sfields);
+    DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     klass->SetIFieldsPtr(ifields);
     DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
     // Load methods.
@@ -2281,17 +2363,22 @@
   // Clean up pass to remove null dex caches.
   // Null dex caches can occur due to class unloading and we are lazily removing null entries.
   JavaVMExt* const vm = self->GetJniEnv()->vm;
-  for (auto it = dex_caches_.begin(); it != dex_caches_.end();) {
-    mirror::Object* dex_cache_root = self->DecodeJObject(*it);
-    if (dex_cache_root == nullptr) {
-      vm->DeleteWeakGlobalRef(self, *it);
+  for (auto it = dex_caches_.begin(); it != dex_caches_.end(); ) {
+    DexCacheData data = *it;
+    if (self->IsJWeakCleared(data.weak_root)) {
+      vm->DeleteWeakGlobalRef(self, data.weak_root);
       it = dex_caches_.erase(it);
     } else {
       ++it;
     }
   }
-  dex_caches_.push_back(vm->AddWeakGlobalRef(self, dex_cache.Get()));
+  jweak dex_cache_jweak = vm->AddWeakGlobalRef(self, dex_cache.Get());
   dex_cache->SetDexFile(&dex_file);
+  DexCacheData data;
+  data.weak_root = dex_cache_jweak;
+  data.dex_file = dex_cache->GetDexFile();
+  data.resolved_types = dex_cache->GetResolvedTypes();
+  dex_caches_.push_back(data);
 }
 
 mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file, LinearAlloc* linear_alloc) {
@@ -2338,10 +2425,16 @@
                                                   const DexFile& dex_file,
                                                   bool allow_failure) {
   // Search assuming unique-ness of dex file.
-  for (jweak weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache != nullptr && dex_cache->GetDexFile() == &dex_file) {
-      return dex_cache;
+  for (const DexCacheData& data : dex_caches_) {
+    // Avoid decoding (and read barriers) other unrelated dex caches.
+    if (data.dex_file == &dex_file) {
+      mirror::DexCache* dex_cache =
+          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      if (dex_cache != nullptr) {
+        return dex_cache;
+      } else {
+        break;
+      }
     }
   }
   if (allow_failure) {
@@ -2349,8 +2442,8 @@
   }
   std::string location(dex_file.GetLocation());
   // Failure, dump diagnostic and abort.
-  for (jobject weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+  for (const DexCacheData& data : dex_caches_) {
+    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
     if (dex_cache != nullptr) {
       LOG(ERROR) << "Registered dex file " << dex_cache->GetDexFile()->GetLocation();
     }
@@ -2362,10 +2455,13 @@
 void ClassLinker::FixupDexCaches(ArtMethod* resolution_method) {
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, dex_lock_);
-  for (jobject weak_root : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache != nullptr) {
-      dex_cache->Fixup(resolution_method, image_pointer_size_);
+  for (const DexCacheData& data : dex_caches_) {
+    if (!self->IsJWeakCleared(data.weak_root)) {
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+          self->DecodeJObject(data.weak_root));
+      if (dex_cache != nullptr) {
+        dex_cache->Fixup(resolution_method, image_pointer_size_);
+      }
     }
   }
 }
@@ -2600,11 +2696,13 @@
   if (existing != nullptr) {
     return existing;
   }
-  if (kIsDebugBuild && !klass->IsTemp() && class_loader == nullptr &&
-      dex_cache_image_class_lookup_required_) {
+  if (kIsDebugBuild &&
+      !klass->IsTemp() &&
+      class_loader == nullptr &&
+      dex_cache_boot_image_class_lookup_required_) {
     // Check a class loaded with the system class loader matches one in the image if the class
     // is in the image.
-    existing = LookupClassFromImage(descriptor);
+    existing = LookupClassFromBootImage(descriptor);
     if (existing != nullptr) {
       CHECK_EQ(klass, existing);
     }
@@ -2648,11 +2746,11 @@
       }
     }
   }
-  if (class_loader != nullptr || !dex_cache_image_class_lookup_required_) {
+  if (class_loader != nullptr || !dex_cache_boot_image_class_lookup_required_) {
     return nullptr;
   }
   // Lookup failed but need to search dex_caches_.
-  mirror::Class* result = LookupClassFromImage(descriptor);
+  mirror::Class* result = LookupClassFromBootImage(descriptor);
   if (result != nullptr) {
     result = InsertClass(descriptor, result, hash);
   } else {
@@ -2661,37 +2759,43 @@
     // classes into the class table.
     constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
     if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
-      MoveImageClassesToClassTable();
+      AddBootImageClassesToClassTable();
     }
   }
   return result;
 }
 
-static mirror::ObjectArray<mirror::DexCache>* GetImageDexCaches()
+static mirror::ObjectArray<mirror::DexCache>* GetImageDexCaches(gc::space::ImageSpace* image_space)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetImageSpace();
-  CHECK(image != nullptr);
-  mirror::Object* root = image->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+  CHECK(image_space != nullptr);
+  mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+  DCHECK(root != nullptr);
   return root->AsObjectArray<mirror::DexCache>();
 }
 
-void ClassLinker::MoveImageClassesToClassTable() {
+void ClassLinker::AddBootImageClassesToClassTable() {
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddImageClassesToClassTable(Runtime::Current()->GetHeap()->GetBootImageSpace(),
+                                /*class_loader*/nullptr);
+    dex_cache_boot_image_class_lookup_required_ = false;
+  }
+}
+
+void ClassLinker::AddImageClassesToClassTable(gc::space::ImageSpace* image_space,
+                                              mirror::ClassLoader* class_loader) {
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  if (!dex_cache_image_class_lookup_required_) {
-    return;  // All dex cache classes are already in the class table.
-  }
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
+  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(image_space);
   std::string temp;
-  ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
     for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
       mirror::Class* klass = types[j].Read();
       if (klass != nullptr) {
-        DCHECK(klass->GetClassLoader() == nullptr);
+        DCHECK_EQ(klass->GetClassLoader(), class_loader);
         const char* descriptor = klass->GetDescriptor(&temp);
         size_t hash = ComputeModifiedUtf8Hash(descriptor);
         mirror::Class* existing = class_table->Lookup(descriptor, hash);
@@ -2707,7 +2811,6 @@
       }
     }
   }
-  dex_cache_image_class_lookup_required_ = false;
 }
 
 class MoveClassTableToPreZygoteVisitor : public ClassLoaderVisitor {
@@ -2731,9 +2834,10 @@
   VisitClassLoaders(&visitor);
 }
 
-mirror::Class* ClassLinker::LookupClassFromImage(const char* descriptor) {
+mirror::Class* ClassLinker::LookupClassFromBootImage(const char* descriptor) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "Image class lookup");
-  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches();
+  mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(
+      Runtime::Current()->GetHeap()->GetBootImageSpace());
   for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     const DexFile* dex_file = dex_cache->GetDexFile();
@@ -2775,8 +2879,8 @@
 
 void ClassLinker::LookupClasses(const char* descriptor, std::vector<mirror::Class*>& result) {
   result.clear();
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -2789,6 +2893,48 @@
   VisitClassLoaders(&visitor);
 }
 
+bool ClassLinker::AttemptSupertypeVerification(Thread* self,
+                                               Handle<mirror::Class> klass,
+                                               Handle<mirror::Class> supertype) {
+  DCHECK(self != nullptr);
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(supertype.Get() != nullptr);
+
+  StackHandleScope<1> hs(self);
+  // Acquire lock to prevent races on verifying the super class.
+  ObjectLock<mirror::Class> super_lock(self, supertype);
+
+  if (!supertype->IsVerified() && !supertype->IsErroneous()) {
+    VerifyClass(self, supertype);
+  }
+  if (supertype->IsCompileTimeVerified()) {
+    // Either we are verified or we soft failed and need to retry at runtime.
+    return true;
+  }
+  // If we got this far then we have a hard failure.
+  std::string error_msg =
+      StringPrintf("Rejecting class %s that attempts to sub-type erroneous class %s",
+                   PrettyDescriptor(klass.Get()).c_str(),
+                   PrettyDescriptor(supertype.Get()).c_str());
+  LOG(WARNING) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
+  Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
+  if (cause.Get() != nullptr) {
+    // Set during VerifyClass call (if at all).
+    self->ClearException();
+  }
+  // Change into a verify error.
+  ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
+  if (cause.Get() != nullptr) {
+    self->GetException()->SetCause(cause.Get());
+  }
+  ClassReference ref(klass->GetDexCache()->GetDexFile(), klass->GetDexClassDefIndex());
+  if (Runtime::Current()->IsAotCompiler()) {
+    Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
+  }
+  mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
+  return false;
+}
+
 void ClassLinker::VerifyClass(Thread* self, Handle<mirror::Class> klass) {
   // TODO: assert that the monitor on the Class is held
   ObjectLock<mirror::Class> lock(self, klass);
@@ -2839,56 +2985,70 @@
 
   // Verify super class.
   StackHandleScope<2> hs(self);
-  Handle<mirror::Class> super(hs.NewHandle(klass->GetSuperClass()));
-  if (super.Get() != nullptr) {
-    // Acquire lock to prevent races on verifying the super class.
-    ObjectLock<mirror::Class> super_lock(self, super);
+  MutableHandle<mirror::Class> supertype(hs.NewHandle(klass->GetSuperClass()));
+  // If we have a superclass and we get a hard verification failure we can return immediately.
+  if (supertype.Get() != nullptr && !AttemptSupertypeVerification(self, klass, supertype)) {
+    CHECK(self->IsExceptionPending()) << "Verification error should be pending.";
+    return;
+  }
 
-    if (!super->IsVerified() && !super->IsErroneous()) {
-      VerifyClass(self, super);
-    }
-    if (!super->IsCompileTimeVerified()) {
-      std::string error_msg(
-          StringPrintf("Rejecting class %s that attempts to sub-class erroneous class %s",
-                       PrettyDescriptor(klass.Get()).c_str(),
-                       PrettyDescriptor(super.Get()).c_str()));
-      LOG(WARNING) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
-      Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
-      if (cause.Get() != nullptr) {
-        self->ClearException();
+  // Verify all default super-interfaces.
+  //
+  // (1) Don't bother if the superclass has already had a soft verification failure.
+  //
+  // (2) Interfaces shouldn't bother to do this recursive verification because they cannot cause
+  //     recursive initialization by themselves. This is because when an interface is initialized
+  //     directly it must not initialize its superinterfaces. We are allowed to verify regardless
+  //     but choose not to for an optimization. If the interfaces is being verified due to a class
+  //     initialization (which would need all the default interfaces to be verified) the class code
+  //     will trigger the recursive verification anyway.
+  if ((supertype.Get() == nullptr || supertype->IsVerified())  // See (1)
+      && !klass->IsInterface()) {                              // See (2)
+    int32_t iftable_count = klass->GetIfTableCount();
+    MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
+    // Loop through all interfaces this class has defined. It doesn't matter the order.
+    for (int32_t i = 0; i < iftable_count; i++) {
+      iface.Assign(klass->GetIfTable()->GetInterface(i));
+      DCHECK(iface.Get() != nullptr);
+      // We only care if we have default interfaces and can skip if we are already verified...
+      if (LIKELY(!iface->HasDefaultMethods() || iface->IsVerified())) {
+        continue;
+      } else if (UNLIKELY(!AttemptSupertypeVerification(self, klass, iface))) {
+        // We had a hard failure while verifying this interface. Just return immediately.
+        CHECK(self->IsExceptionPending()) << "Verification error should be pending.";
+        return;
+      } else if (UNLIKELY(!iface->IsVerified())) {
+        // We softly failed to verify the iface. Stop checking and clean up.
+        // Put the iface into the supertype handle so we know what caused us to fail.
+        supertype.Assign(iface.Get());
+        break;
       }
-      ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
-      if (cause.Get() != nullptr) {
-        self->GetException()->SetCause(cause.Get());
-      }
-      ClassReference ref(klass->GetDexCache()->GetDexFile(), klass->GetDexClassDefIndex());
-      if (Runtime::Current()->IsAotCompiler()) {
-        Runtime::Current()->GetCompilerCallbacks()->ClassRejected(ref);
-      }
-      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
-      return;
     }
   }
 
+  // At this point if verification failed, then supertype is the "first" supertype that failed
+  // verification (without a specific order). If verification succeeded, then supertype is either
+  // null or the original superclass of klass and is verified.
+  DCHECK(supertype.Get() == nullptr ||
+         supertype.Get() == klass->GetSuperClass() ||
+         !supertype->IsVerified());
+
   // Try to use verification information from the oat file, otherwise do runtime verification.
   const DexFile& dex_file = *klass->GetDexCache()->GetDexFile();
   mirror::Class::Status oat_file_class_status(mirror::Class::kStatusNotReady);
   bool preverified = VerifyClassUsingOatFile(dex_file, klass.Get(), oat_file_class_status);
-  if (oat_file_class_status == mirror::Class::kStatusError) {
-    VLOG(class_linker) << "Skipping runtime verification of erroneous class "
-        << PrettyDescriptor(klass.Get()) << " in "
-        << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
-    ThrowVerifyError(klass.Get(), "Rejecting class %s because it failed compile-time verification",
-                     PrettyDescriptor(klass.Get()).c_str());
-    mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
-    return;
-  }
+  // If the oat file says the class had an error, re-run the verifier. That way we will get a
+  // precise error message. To ensure a rerun, test:
+  //     oat_file_class_status == mirror::Class::kStatusError => !preverified
+  DCHECK(!(oat_file_class_status == mirror::Class::kStatusError) || !preverified);
+
   verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   std::string error_msg;
   if (!preverified) {
     verifier_failure = verifier::MethodVerifier::VerifyClass(self,
                                                              klass.Get(),
                                                              Runtime::Current()->IsAotCompiler(),
+                                                             Runtime::Current()->IsAotCompiler(),
                                                              &error_msg);
   }
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
@@ -2901,14 +3061,14 @@
     // Make sure all classes referenced by catch blocks are resolved.
     ResolveClassExceptionHandlerTypes(dex_file, klass);
     if (verifier_failure == verifier::MethodVerifier::kNoFailure) {
-      // Even though there were no verifier failures we need to respect whether the super-class
-      // was verified or requiring runtime reverification.
-      if (super.Get() == nullptr || super->IsVerified()) {
+      // Even though there were no verifier failures we need to respect whether the super-class and
+      // super-default-interfaces were verified or requiring runtime reverification.
+      if (supertype.Get() == nullptr || supertype->IsVerified()) {
         mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
       } else {
-        CHECK_EQ(super->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
+        CHECK_EQ(supertype->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
         mirror::Class::SetStatus(klass, mirror::Class::kStatusRetryVerificationAtRuntime, self);
-        // Pretend a soft failure occured so that we don't consider the class verified below.
+        // Pretend a soft failure occurred so that we don't consider the class verified below.
         verifier_failure = verifier::MethodVerifier::kSoftFailure;
       }
     } else {
@@ -2926,9 +3086,9 @@
       }
     }
   } else {
-    LOG(WARNING) << "Verification failed on class " << PrettyDescriptor(klass.Get())
-        << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
-        << " because: " << error_msg;
+    VLOG(verifier) << "Verification failed on class " << PrettyDescriptor(klass.Get())
+                  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
+                  << " because: " << error_msg;
     self->AssertNoPendingException();
     ThrowVerifyError(klass.Get(), "%s", error_msg.c_str());
     mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
@@ -3239,15 +3399,18 @@
     Thread* const self = Thread::Current();
     ReaderMutexLock mu(self, dex_lock_);
     // Locate the dex cache of the original interface/Object
-    for (jobject weak_root : dex_caches_) {
-      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-      if (dex_cache != nullptr &&
-          proxy_method->HasSameDexCacheResolvedTypes(dex_cache->GetResolvedTypes(),
+    for (const DexCacheData& data : dex_caches_) {
+      if (!self->IsJWeakCleared(data.weak_root) &&
+          proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
                                                      image_pointer_size_)) {
-        ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
-            proxy_method->GetDexMethodIndex(), image_pointer_size_);
-        CHECK(resolved_method != nullptr);
-        return resolved_method;
+        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+            self->DecodeJObject(data.weak_root));
+        if (dex_cache != nullptr) {
+          ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
+              proxy_method->GetDexMethodIndex(), image_pointer_size_);
+          CHECK(resolved_method != nullptr);
+          return resolved_method;
+        }
       }
     }
   }
@@ -3312,7 +3475,7 @@
   // Basic sanity
   CHECK(!prototype->IsFinal());
   CHECK(method->IsFinal());
-  CHECK(!method->IsAbstract());
+  CHECK(method->IsInvokable());
 
   // The proxy method doesn't have its own dex cache or dex file and so it steals those of its
   // interface prototype. The exception to this are Constructors and the Class of the Proxy itself.
@@ -3399,7 +3562,7 @@
 
     // Was the class already found to be erroneous? Done under the lock to match the JLS.
     if (klass->IsErroneous()) {
-      ThrowEarlierClassFailure(klass.Get());
+      ThrowEarlierClassFailure(klass.Get(), true);
       VlogClassInitializationFailure(klass);
       return false;
     }
@@ -4043,10 +4206,10 @@
         Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
       }
       CHECK_EQ(existing, klass.Get());
-      if (kIsDebugBuild && class_loader == nullptr && dex_cache_image_class_lookup_required_) {
+      if (kIsDebugBuild && class_loader == nullptr && dex_cache_boot_image_class_lookup_required_) {
         // Check a class loaded with the system class loader matches one in the image if the class
         // is in the image.
-        mirror::Class* const image_class = LookupClassFromImage(descriptor);
+        mirror::Class* const image_class = LookupClassFromBootImage(descriptor);
         if (image_class != nullptr) {
           CHECK_EQ(klass.Get(), existing) << descriptor;
         }
@@ -4388,7 +4551,7 @@
   // A map from vtable indexes to the method they need to be updated to point to. Used because we
   // need to have default methods be in the virtuals array of each class but we don't set that up
   // until LinkInterfaceMethods.
-  std::unordered_map<size_t, ArtMethod*> default_translations;
+  std::unordered_map<size_t, ClassLinker::MethodTranslation> default_translations;
   // Link virtual methods then interface methods.
   // We set up the interface lookup table first because we need it to determine if we need to update
   // any vtable entries with new default method implementations.
@@ -4521,7 +4684,7 @@
 bool ClassLinker::LinkVirtualMethods(
     Thread* self,
     Handle<mirror::Class> klass,
-    /*out*/std::unordered_map<size_t, ArtMethod*>* default_translations) {
+    /*out*/std::unordered_map<size_t, ClassLinker::MethodTranslation>* default_translations) {
   const size_t num_virtual_methods = klass->NumVirtualMethods();
   if (klass->IsInterface()) {
     // No vtable.
@@ -4644,46 +4807,55 @@
                        << " would have incorrectly overridden the package-private method in "
                        << PrettyDescriptor(super_method->GetDeclaringClassDescriptor());
         }
-      } else if (super_method->IsDefault()) {
+      } else if (super_method->IsOverridableByDefaultMethod()) {
         // We didn't directly override this method but we might through default methods...
         // Check for default method update.
         ArtMethod* default_method = nullptr;
-        std::string icce_message;
-        if (!FindDefaultMethodImplementation(self,
-                                             super_method,
-                                             klass,
-                                             /*out*/&default_method,
-                                             /*out*/&icce_message)) {
-          // An error occurred while finding default methods.
-          // TODO This should actually be thrown when we attempt to invoke this method.
-          ThrowIncompatibleClassChangeError(klass.Get(), "%s", icce_message.c_str());
-          return false;
-        }
-        // This should always work because we inherit superclass interfaces. We should either get
-        //  1) An IncompatibleClassChangeError because of conflicting default method
-        //     implementations.
-        //  2) The same default method implementation as the superclass.
-        //  3) A default method that overrides the superclass's.
-        // Therefore this check should never fail.
-        CHECK(default_method != nullptr);
-        if (UNLIKELY(default_method->GetDeclaringClass() != super_method->GetDeclaringClass())) {
-          // TODO Refactor this add default methods to virtuals here and not in
-          //      LinkInterfaceMethods maybe.
-          //      The problem is default methods might override previously present default-method or
-          //      miranda-method vtable entries from the superclass. Unfortunately we need these to
-          //      be entries in this class's virtuals. We do not give these entries there until
-          //      LinkInterfaceMethods so we pass this map around to let it know which vtable
-          //      entries need to be updated.
-          // Make a note that vtable entry j must be updated, store what it needs to be updated to.
-          // We will allocate a virtual method slot in LinkInterfaceMethods and fix it up then.
-          default_translations->insert({j, default_method});
-          VLOG(class_linker) << "Method " << PrettyMethod(super_method) << " overridden by default "
-                             << PrettyMethod(default_method) << " in " << PrettyClass(klass.Get());
-        } else {
-          // They are the same method/no override
-          // Cannot do direct comparison because we had to copy the ArtMethod object into the
-          // superclass's vtable.
-          continue;
+        switch (FindDefaultMethodImplementation(self,
+                                                super_method,
+                                                klass,
+                                                /*out*/&default_method)) {
+          case DefaultMethodSearchResult::kDefaultConflict: {
+            // A conflict was found looking for default methods. Note this (assuming it wasn't
+            // pre-existing) in the translations map.
+            if (UNLIKELY(!super_method->IsDefaultConflicting())) {
+              // Don't generate another conflict method to reduce memory use as an optimization.
+              default_translations->insert(
+                  {j, ClassLinker::MethodTranslation::CreateConflictingMethod()});
+            }
+            break;
+          }
+          case DefaultMethodSearchResult::kAbstractFound: {
+            // No conflict but method is abstract.
+            // We note that this vtable entry must be made abstract.
+            if (UNLIKELY(!super_method->IsAbstract())) {
+              default_translations->insert(
+                  {j, ClassLinker::MethodTranslation::CreateAbstractMethod()});
+            }
+            break;
+          }
+          case DefaultMethodSearchResult::kDefaultFound: {
+            if (UNLIKELY(super_method->IsDefaultConflicting() ||
+                        default_method->GetDeclaringClass() != super_method->GetDeclaringClass())) {
+              // Found a default method implementation that is new.
+              // TODO Refactor this add default methods to virtuals here and not in
+              //      LinkInterfaceMethods maybe.
+              //      The problem is default methods might override previously present
+              //      default-method or miranda-method vtable entries from the superclass.
+              //      Unfortunately we need these to be entries in this class's virtuals. We do not
+              //      give these entries there until LinkInterfaceMethods so we pass this map around
+              //      to let it know which vtable entries need to be updated.
+              // Make a note that vtable entry j must be updated, store what it needs to be updated
+              // to. We will allocate a virtual method slot in LinkInterfaceMethods and fix it up
+              // then.
+              default_translations->insert(
+                  {j, ClassLinker::MethodTranslation::CreateTranslatedMethod(default_method)});
+              VLOG(class_linker) << "Method " << PrettyMethod(super_method)
+                                 << " overridden by default " << PrettyMethod(default_method)
+                                 << " in " << PrettyClass(klass.Get());
+            }
+            break;
+          }
         }
       }
     }
@@ -4736,23 +4908,75 @@
   return true;
 }
 
+// Determine if the given iface has any subinterface in the given list that declares the method
+// specified by 'target'.
+//
+// Arguments
+// - self:    The thread we are running on
+// - target:  A comparator that will match any method that overrides the method we are checking for
+// - iftable: The iftable we are searching for an overriding method on.
+// - ifstart: The index of the interface we are checking to see if anything overrides
+// - iface:   The interface we are checking to see if anything overrides.
+// - image_pointer_size:
+//            The image pointer size.
+//
+// Returns
+// - True:  There is some method that matches the target comparator defined in an interface that
+//          is a subtype of iface.
+// - False: There is no method that matches the target comparator in any interface that is a subtype
+//          of iface.
+static bool ContainsOverridingMethodOf(Thread* self,
+                                       MethodNameAndSignatureComparator& target,
+                                       Handle<mirror::IfTable> iftable,
+                                       size_t ifstart,
+                                       Handle<mirror::Class> iface,
+                                       size_t image_pointer_size)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  DCHECK(self != nullptr);
+  DCHECK(iface.Get() != nullptr);
+  DCHECK(iftable.Get() != nullptr);
+  DCHECK_GE(ifstart, 0u);
+  DCHECK_LT(ifstart, iftable->Count());
+  DCHECK_EQ(iface.Get(), iftable->GetInterface(ifstart));
+  DCHECK(iface->IsInterface());
+
+  size_t iftable_count = iftable->Count();
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::Class> current_iface(hs.NewHandle<mirror::Class>(nullptr));
+  for (size_t k = ifstart + 1; k < iftable_count; k++) {
+    // Skip ifstart since our current interface obviously cannot override itself.
+    current_iface.Assign(iftable->GetInterface(k));
+    size_t num_instance_methods = current_iface->NumVirtualMethods();
+    // Iterate through every method on this interface. The order does not matter so we go forwards.
+    for (size_t m = 0; m < num_instance_methods; m++) {
+      ArtMethod* current_method = current_iface->GetVirtualMethodUnchecked(m, image_pointer_size);
+      if (UNLIKELY(target.HasSameNameAndSignature(
+                      current_method->GetInterfaceMethodIfProxy(image_pointer_size)))) {
+        // Check if the i'th interface is a subtype of this one.
+        if (iface->IsAssignableFrom(current_iface.Get())) {
+          return true;
+        }
+        break;
+      }
+    }
+  }
+  return false;
+}
+
 // Find the default method implementation for 'interface_method' in 'klass'. Stores it into
-// out_default_method and returns true on success. If no default method was found stores nullptr
-// into out_default_method and returns true. If an error occurs (such as a default_method conflict)
-// it will fill the icce_message with an appropriate message for an IncompatibleClassChangeError,
-// which should then be thrown by the caller.
-bool ClassLinker::FindDefaultMethodImplementation(Thread* self,
-                                                  ArtMethod* target_method,
-                                                  Handle<mirror::Class> klass,
-                                                  /*out*/ArtMethod** out_default_method,
-                                                  /*out*/std::string* icce_message) const {
+// out_default_method and returns kDefaultFound on success. If no default method was found return
+// kAbstractFound and store nullptr into out_default_method. If an error occurs (such as a
+// default_method conflict) it will return kDefaultConflict.
+ClassLinker::DefaultMethodSearchResult ClassLinker::FindDefaultMethodImplementation(
+    Thread* self,
+    ArtMethod* target_method,
+    Handle<mirror::Class> klass,
+    /*out*/ArtMethod** out_default_method) const {
   DCHECK(self != nullptr);
   DCHECK(target_method != nullptr);
   DCHECK(out_default_method != nullptr);
-  DCHECK(icce_message != nullptr);
 
   *out_default_method = nullptr;
-  mirror::Class* chosen_iface = nullptr;
 
   // We organize the interface table so that, for interface I any subinterfaces J follow it in the
   // table. This lets us walk the table backwards when searching for default methods.  The first one
@@ -4763,19 +4987,23 @@
   // The order of unrelated interfaces does not matter and is not defined.
   size_t iftable_count = klass->GetIfTableCount();
   if (iftable_count == 0) {
-    // No interfaces. We have already reset out to null so just return true.
-    return true;
+    // No interfaces. We have already reset out to null so just return kAbstractFound.
+    return DefaultMethodSearchResult::kAbstractFound;
   }
 
-  StackHandleScope<1> hs(self);
+  StackHandleScope<3> hs(self);
+  MutableHandle<mirror::Class> chosen_iface(hs.NewHandle<mirror::Class>(nullptr));
   MutableHandle<mirror::IfTable> iftable(hs.NewHandle(klass->GetIfTable()));
+  MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
   MethodNameAndSignatureComparator target_name_comparator(
       target_method->GetInterfaceMethodIfProxy(image_pointer_size_));
   // Iterates over the klass's iftable in reverse
-  // We have a break at the end because size_t is unsigned.
-  for (size_t k = iftable_count - 1; /* break if k == 0 at end */; --k) {
+  for (size_t k = iftable_count; k != 0; ) {
+    --k;
+
     DCHECK_LT(k, iftable->Count());
-    mirror::Class* iface = iftable->GetInterface(k);
+
+    iface.Assign(iftable->GetInterface(k));
     size_t num_instance_methods = iface->NumVirtualMethods();
     // Iterate through every method on this interface. The order does not matter so we go forwards.
     for (size_t m = 0; m < num_instance_methods; m++) {
@@ -4788,31 +5016,60 @@
       }
       // The verifier should have caught the non-public method.
       DCHECK(current_method->IsPublic()) << "Interface method is not public!";
-      if (UNLIKELY(chosen_iface != nullptr)) {
-        // We have multiple default impls of the same method. We need to check they do not
-        // conflict and throw an error if they do. Conflicting means that the current iface is not
-        // masked by the chosen interface.
-        if (!iface->IsAssignableFrom(chosen_iface)) {
-          *icce_message = StringPrintf("Conflicting default method implementations: '%s' and '%s'",
-                                       PrettyMethod(current_method).c_str(),
-                                       PrettyMethod(*out_default_method).c_str());
-          return false;
+      if (UNLIKELY(chosen_iface.Get() != nullptr)) {
+        // We have multiple default impls of the same method. This is a potential default conflict.
+        // We need to check if this possibly conflicting method is either a superclass of the chosen
+        // default implementation or is overridden by a non-default interface method. In either case
+        // there is no conflict.
+        if (!iface->IsAssignableFrom(chosen_iface.Get()) &&
+            !ContainsOverridingMethodOf(self,
+                                        target_name_comparator,
+                                        iftable,
+                                        k,
+                                        iface,
+                                        image_pointer_size_)) {
+          LOG(WARNING) << "Conflicting default method implementations found: "
+                       << PrettyMethod(current_method) << " and "
+                       << PrettyMethod(*out_default_method) << " in class "
+                       << PrettyClass(klass.Get()) << " conflict.";
+          *out_default_method = nullptr;
+          return DefaultMethodSearchResult::kDefaultConflict;
         } else {
           break;  // Continue checking at the next interface.
         }
       } else {
-        *out_default_method = current_method;
-        chosen_iface = iface;
-        // We should now finish traversing the graph to find if we have default methods that
-        // conflict.
-        break;
+        // chosen_iface == null
+        if (!ContainsOverridingMethodOf(self,
+                                        target_name_comparator,
+                                        iftable,
+                                        k,
+                                        iface,
+                                        image_pointer_size_)) {
+          // Don't set this as the chosen interface if something else is overriding it (because that
+          // other interface would be potentially chosen instead if it was default). If the other
+          // interface was abstract then we wouldn't select this interface as chosen anyway since
+          // the abstract method masks it.
+          *out_default_method = current_method;
+          chosen_iface.Assign(iface.Get());
+          // We should now finish traversing the graph to find if we have default methods that
+          // conflict.
+        } else {
+          VLOG(class_linker) << "A default method '" << PrettyMethod(current_method) << "' was "
+                            << "skipped because it was overridden by an abstract method in a "
+                            << "subinterface on class '" << PrettyClass(klass.Get()) << "'";
+        }
       }
-    }
-    if (k == 0) {
       break;
     }
   }
-  return true;
+  if (*out_default_method != nullptr) {
+    VLOG(class_linker) << "Default method '" << PrettyMethod(*out_default_method) << "' selected "
+                       << "as the implementation for '" << PrettyMethod(target_method) << "' "
+                       << "in '" << PrettyClass(klass.Get()) << "'";
+    return DefaultMethodSearchResult::kDefaultFound;
+  } else {
+    return DefaultMethodSearchResult::kAbstractFound;
+  }
 }
 
 // Sets imt_ref appropriately for LinkInterfaceMethods.
@@ -4820,7 +5077,7 @@
 // Otherwise it will set the conflict method which will figure out which method to use during
 // runtime.
 static void SetIMTRef(ArtMethod* unimplemented_method,
-                      ArtMethod* conflict_method,
+                      ArtMethod* imt_conflict_method,
                       size_t image_pointer_size,
                       ArtMethod* current_method,
                       /*out*/ArtMethod** imt_ref)
@@ -4828,7 +5085,7 @@
   // Place method in imt if entry is empty, place conflict otherwise.
   if (*imt_ref == unimplemented_method) {
     *imt_ref = current_method;
-  } else if (*imt_ref != conflict_method) {
+  } else if (*imt_ref != imt_conflict_method) {
     // If we are not a conflict and we have the same signature and name as the imt
     // entry, it must be that we overwrote a superclass vtable entry.
     MethodNameAndSignatureComparator imt_comparator(
@@ -4837,7 +5094,7 @@
           current_method->GetInterfaceMethodIfProxy(image_pointer_size))) {
       *imt_ref = current_method;
     } else {
-      *imt_ref = conflict_method;
+      *imt_ref = imt_conflict_method;
     }
   }
 }
@@ -5044,10 +5301,23 @@
   return true;
 }
 
+// Finds the method with a name/signature that matches cmp in the given list of methods. The list of
+// methods must be unique.
+static ArtMethod* FindSameNameAndSignature(MethodNameAndSignatureComparator& cmp,
+                                           const ScopedArenaVector<ArtMethod*>& list)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  for (ArtMethod* method : list) {
+    if (cmp.HasSameNameAndSignature(method)) {
+      return method;
+    }
+  }
+  return nullptr;
+}
+
 bool ClassLinker::LinkInterfaceMethods(
     Thread* self,
     Handle<mirror::Class> klass,
-    const std::unordered_map<size_t, ArtMethod*>& default_translations,
+    const std::unordered_map<size_t, ClassLinker::MethodTranslation>& default_translations,
     ArtMethod** out_imt) {
   StackHandleScope<3> hs(self);
   Runtime* const runtime = Runtime::Current();
@@ -5070,12 +5340,14 @@
   // Use the linear alloc pool since this one is in the low 4gb for the compiler.
   ArenaStack stack(runtime->GetLinearAlloc()->GetArenaPool());
   ScopedArenaAllocator allocator(&stack);
+
+  ScopedArenaVector<ArtMethod*> default_conflict_methods(allocator.Adapter());
   ScopedArenaVector<ArtMethod*> miranda_methods(allocator.Adapter());
   ScopedArenaVector<ArtMethod*> default_methods(allocator.Adapter());
 
   MutableHandle<mirror::PointerArray> vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
-  ArtMethod* const conflict_method = runtime->GetImtConflictMethod();
+  ArtMethod* const imt_conflict_method = runtime->GetImtConflictMethod();
   // Copy the IMT from the super class if possible.
   bool extend_super_iftable = false;
   if (has_superclass) {
@@ -5111,8 +5383,8 @@
           auto** imt_ref = &out_imt[imt_index];
           if (*imt_ref == unimplemented_method) {
             *imt_ref = method;
-          } else if (*imt_ref != conflict_method) {
-            *imt_ref = conflict_method;
+          } else if (*imt_ref != imt_conflict_method) {
+            *imt_ref = imt_conflict_method;
           }
         }
       }
@@ -5147,7 +5419,16 @@
 
   auto* old_cause = self->StartAssertNoThreadSuspension(
       "Copying ArtMethods for LinkInterfaceMethods");
-  for (size_t i = 0; i < ifcount; ++i) {
+  // Going in reverse to ensure that we will hit abstract methods that override defaults before the
+  // defaults. This means we don't need to do any trickery when creating the Miranda methods, since
+  // they will already be null. This has the additional benefit that the declarer of a miranda
+  // method will actually declare an abstract method.
+  for (size_t i = ifcount; i != 0; ) {
+    --i;
+
+    DCHECK_GE(i, 0u);
+    DCHECK_LT(i, ifcount);
+
     size_t num_methods = iftable->GetInterface(i)->NumVirtualMethods();
     if (num_methods > 0) {
       StackHandleScope<2> hs2(self);
@@ -5158,6 +5439,11 @@
       LengthPrefixedArray<ArtMethod>* input_virtual_methods = nullptr;
       Handle<mirror::PointerArray> input_vtable_array = NullHandle<mirror::PointerArray>();
       int32_t input_array_length = 0;
+      // TODO Cleanup Needed: In the presence of default methods this optimization is rather dirty
+      //      and confusing. Default methods should always look through all the superclasses
+      //      because they are the last choice of an implementation. We get around this by looking
+      //      at the super-classes iftable methods (copied into method_array previously) when we are
+      //      looking for the implementation of a super-interface method but that is rather dirty.
       if (super_interface) {
         // We are overwriting a super class interface, try to only virtual methods instead of the
         // whole vtable.
@@ -5187,8 +5473,7 @@
         //
         // To find defaults we need to do the same but also go over interfaces.
         bool found_impl = false;
-        ArtMethod* default_impl = nullptr;
-        bool found_default_impl = false;
+        ArtMethod* vtable_impl = nullptr;
         for (int32_t k = input_array_length - 1; k >= 0; --k) {
           ArtMethod* vtable_method = input_virtual_methods != nullptr ?
               &input_virtual_methods->At(k, method_size, method_alignment) :
@@ -5205,77 +5490,138 @@
                   "Method '%s' implementing interface method '%s' is not public",
                   PrettyMethod(vtable_method).c_str(), PrettyMethod(interface_method).c_str());
               return false;
-            } else if (vtable_method->IsDefault()) {
+            } else if (UNLIKELY(vtable_method->IsOverridableByDefaultMethod())) {
               // We might have a newer, better, default method for this, so we just skip it. If we
               // are still using this we will select it again when scanning for default methods. To
               // obviate the need to copy the method again we will make a note that we already found
               // a default here.
               // TODO This should be much cleaner.
-              found_default_impl = true;
-              default_impl = vtable_method;
+              vtable_impl = vtable_method;
               break;
             } else {
               found_impl = true;
-            }
-            method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
-            // Place method in imt if entry is empty, place conflict otherwise.
-            SetIMTRef(unimplemented_method,
-                      conflict_method,
-                      image_pointer_size_,
-                      vtable_method,
-                      /*out*/imt_ptr);
-            break;
-          }
-        }
-        // We should only search for default implementations when the class does not implement the
-        // method directly and either (1) the interface is newly implemented on this class and not
-        // on any of its superclasses, (2) the superclass's implementation is a default method, or
-        // (3) the superclass does not have an implementation.
-        if (!found_impl && (!super_interface ||
-                            method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_)
-                                ->IsOverridableByDefaultMethod())) {
-          ArtMethod* current_method = nullptr;
-          std::string icce_message;
-          if (!FindDefaultMethodImplementation(self,
-                                               interface_method,
-                                               klass,
-                                               /*out*/&current_method,
-                                               /*out*/&icce_message)) {
-            // There was a conflict with default method implementations.
-            self->EndAssertNoThreadSuspension(old_cause);
-            // TODO This should actually be thrown when we attempt to invoke this method.
-            ThrowIncompatibleClassChangeError(klass.Get(), "%s", icce_message.c_str());
-            return false;
-          } else if (current_method != nullptr) {
-            if (found_default_impl &&
-                current_method->GetDeclaringClass() == default_impl->GetDeclaringClass()) {
-              // We found a default method but it was the same one we already have from our
-              // superclass. Don't bother adding it to our vtable again.
-              current_method = default_impl;
-            } else {
-              // We found a default method implementation and there were no conflicts.
-              // Save the default method. We need to add it to the vtable.
-              default_methods.push_back(current_method);
-            }
-            method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
-            SetIMTRef(unimplemented_method,
-                      conflict_method,
-                      image_pointer_size_,
-                      current_method,
-                      /*out*/imt_ptr);
-            found_impl = true;
-          }
-        }
-        if (!found_impl && !super_interface) {
-          // It is defined in this class or any of its subclasses.
-          ArtMethod* miranda_method = nullptr;
-          for (auto& mir_method : miranda_methods) {
-            if (interface_name_comparator.HasSameNameAndSignature(mir_method)) {
-              miranda_method = mir_method;
+              method_array->SetElementPtrSize(j, vtable_method, image_pointer_size_);
+              // Place method in imt if entry is empty, place conflict otherwise.
+              SetIMTRef(unimplemented_method,
+                        imt_conflict_method,
+                        image_pointer_size_,
+                        vtable_method,
+                        /*out*/imt_ptr);
               break;
             }
           }
+        }
+        // Continue on to the next method if we are done.
+        if (LIKELY(found_impl)) {
+          continue;
+        } else if (LIKELY(super_interface)) {
+          // Don't look for a default implementation when the super-method is implemented directly
+          // by the class.
+          //
+          // See if we can use the superclasses method and skip searching everything else.
+          // Note: !found_impl && super_interface
+          CHECK(extend_super_iftable);
+          // If this is a super_interface method it is possible we shouldn't override it because a
+          // superclass could have implemented it directly.  We get the method the superclass used
+          // to implement this to know if we can override it with a default method. Doing this is
+          // safe since we know that the super_iftable is filled in so we can simply pull it from
+          // there. We don't bother if this is not a super-classes interface since in that case we
+          // have scanned the entire vtable anyway and would have found it.
+          // TODO This is rather dirty but it is faster than searching through the entire vtable
+          //      every time.
+          ArtMethod* supers_method =
+              method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+          DCHECK(supers_method != nullptr);
+          DCHECK(interface_name_comparator.HasSameNameAndSignature(supers_method));
+          if (!supers_method->IsOverridableByDefaultMethod()) {
+            // The method is not overridable by a default method (i.e. it is directly implemented
+            // in some class). Therefore move onto the next interface method.
+            continue;
+          }
+        }
+        // If we haven't found it yet we should search through the interfaces for default methods.
+        ArtMethod* current_method = nullptr;
+        switch (FindDefaultMethodImplementation(self,
+                                                interface_method,
+                                                klass,
+                                                /*out*/&current_method)) {
+          case DefaultMethodSearchResult::kDefaultConflict: {
+            // Default method conflict.
+            DCHECK(current_method == nullptr);
+            ArtMethod* default_conflict_method = nullptr;
+            if (vtable_impl != nullptr && vtable_impl->IsDefaultConflicting()) {
+              // We can reuse the method from the superclass, don't bother adding it to virtuals.
+              default_conflict_method = vtable_impl;
+            } else {
+              // See if we already have a conflict method for this method.
+              ArtMethod* preexisting_conflict = FindSameNameAndSignature(interface_name_comparator,
+                                                                          default_conflict_methods);
+              if (LIKELY(preexisting_conflict != nullptr)) {
+                // We already have another conflict we can reuse.
+                default_conflict_method = preexisting_conflict;
+              } else {
+                // Create a new conflict method for this to use.
+                default_conflict_method =
+                    reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
+                new(default_conflict_method) ArtMethod(interface_method, image_pointer_size_);
+                default_conflict_methods.push_back(default_conflict_method);
+              }
+            }
+            current_method = default_conflict_method;
+            break;
+          }
+          case DefaultMethodSearchResult::kDefaultFound: {
+            DCHECK(current_method != nullptr);
+            // Found a default method.
+            if (vtable_impl != nullptr &&
+                current_method->GetDeclaringClass() == vtable_impl->GetDeclaringClass()) {
+              // We found a default method but it was the same one we already have from our
+              // superclass. Don't bother adding it to our vtable again.
+              current_method = vtable_impl;
+            } else {
+              // Only record this default method if it is new to save space.
+              ArtMethod* old = FindSameNameAndSignature(interface_name_comparator, default_methods);
+              if (old == nullptr) {
+                // We found a default method implementation and there were no conflicts.
+                // Save the default method. We need to add it to the vtable.
+                default_methods.push_back(current_method);
+              } else {
+                CHECK(old == current_method) << "Multiple default implementations selected!";
+              }
+            }
+            break;
+          }
+          case DefaultMethodSearchResult::kAbstractFound: {
+            DCHECK(current_method == nullptr);
+            // Abstract method masks all defaults.
+            if (vtable_impl != nullptr &&
+                vtable_impl->IsAbstract() &&
+                !vtable_impl->IsDefaultConflicting()) {
+              // We need to make this an abstract method but the version in the vtable already is so
+              // don't do anything.
+              current_method = vtable_impl;
+            }
+            break;
+          }
+        }
+        if (current_method != nullptr) {
+          // We found a default method implementation. Record it in the iftable and IMT.
+          method_array->SetElementPtrSize(j, current_method, image_pointer_size_);
+          SetIMTRef(unimplemented_method,
+                    imt_conflict_method,
+                    image_pointer_size_,
+                    current_method,
+                    /*out*/imt_ptr);
+        } else if (!super_interface) {
+          // We could not find an implementation for this method and since it is a brand new
+          // interface we searched the entire vtable (and all default methods) for an implementation
+          // but couldn't find one. We therefore need to make a miranda method.
+          //
+          // Find out if there is already a miranda method we can use.
+          ArtMethod* miranda_method = FindSameNameAndSignature(interface_name_comparator,
+                                                               miranda_methods);
           if (miranda_method == nullptr) {
+            DCHECK(interface_method->IsAbstract()) << PrettyMethod(interface_method);
             miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
             CHECK(miranda_method != nullptr);
             // Point the interface table at a phantom slot.
@@ -5287,10 +5633,15 @@
       }
     }
   }
-  if (!miranda_methods.empty() || !default_methods.empty()) {
+  if (!miranda_methods.empty() || !default_methods.empty() || !default_conflict_methods.empty()) {
+    VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
+                       << " default_methods=" << default_methods.size()
+                       << " default_conflict_methods=" << default_conflict_methods.size();
     const size_t old_method_count = klass->NumVirtualMethods();
-    const size_t new_method_count =
-        old_method_count + miranda_methods.size() + default_methods.size();
+    const size_t new_method_count = old_method_count +
+                                    miranda_methods.size() +
+                                    default_methods.size() +
+                                    default_conflict_methods.size();
     // Attempt to realloc to save RAM if possible.
     LengthPrefixedArray<ArtMethod>* old_virtuals = klass->GetVirtualMethodsPtr();
     // The Realloced virtual methods aren't visiblef from the class roots, so there is no issue
@@ -5348,15 +5699,32 @@
     for (ArtMethod* def_method : default_methods) {
       ArtMethod& new_method = *out;
       new_method.CopyFrom(def_method, image_pointer_size_);
-      new_method.SetAccessFlags(new_method.GetAccessFlags() | kAccDefault);
       // Clear the preverified flag if it is present. Since this class hasn't been verified yet it
       // shouldn't have methods that are preverified.
       // TODO This is rather arbitrary. We should maybe support classes where only some of its
       // methods are preverified.
-      new_method.SetAccessFlags(new_method.GetAccessFlags() & ~kAccPreverified);
+      new_method.SetAccessFlags((new_method.GetAccessFlags() | kAccDefault) & ~kAccPreverified);
       move_table.emplace(def_method, &new_method);
       ++out;
     }
+    for (ArtMethod* conf_method : default_conflict_methods) {
+      ArtMethod& new_method = *out;
+      new_method.CopyFrom(conf_method, image_pointer_size_);
+      // This is a type of default method (there are default method impls, just a conflict) so mark
+      // this as a default, non-abstract method, since thats what it is. Also clear the preverified
+      // bit since this class hasn't been verified yet it shouldn't have methods that are
+      // preverified.
+      constexpr uint32_t kSetFlags = kAccDefault | kAccDefaultConflict;
+      constexpr uint32_t kMaskFlags = ~(kAccAbstract | kAccPreverified);
+      new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
+      DCHECK(new_method.IsDefaultConflicting());
+      // The actual method might or might not be marked abstract since we just copied it from a
+      // (possibly default) interface method. We need to set it entry point to be the bridge so that
+      // the compiler will not invoke the implementation of whatever method we copied from.
+      EnsureThrowsInvocationError(&new_method);
+      move_table.emplace(conf_method, &new_method);
+      ++out;
+    }
     virtuals->SetSize(new_method_count);
     UpdateClassVirtualMethods(klass.Get(), virtuals);
     // Done copying methods, they are all roots in the class now, so we can end the no thread
@@ -5364,8 +5732,10 @@
     self->EndAssertNoThreadSuspension(old_cause);
 
     const size_t old_vtable_count = vtable->GetLength();
-    const size_t new_vtable_count =
-        old_vtable_count + miranda_methods.size() + default_methods.size();
+    const size_t new_vtable_count = old_vtable_count +
+                                    miranda_methods.size() +
+                                    default_methods.size() +
+                                    default_conflict_methods.size();
     miranda_methods.clear();
     vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
     if (UNLIKELY(vtable.Get() == nullptr)) {
@@ -5390,9 +5760,27 @@
       auto translation_it = default_translations.find(i);
       bool found_translation = false;
       if (translation_it != default_translations.end()) {
-        size_t vtable_index;
-        std::tie(vtable_index, translated_method) = *translation_it;
-        DCHECK_EQ(vtable_index, i);
+        if (translation_it->second.IsInConflict()) {
+          // Find which conflict method we are to use for this method.
+          MethodNameAndSignatureComparator old_method_comparator(
+              translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+          ArtMethod* new_conflict_method = FindSameNameAndSignature(old_method_comparator,
+                                                                    default_conflict_methods);
+          CHECK(new_conflict_method != nullptr) << "Expected a conflict method!";
+          translated_method = new_conflict_method;
+        } else if (translation_it->second.IsAbstract()) {
+          // Find which miranda method we are to use for this method.
+          MethodNameAndSignatureComparator old_method_comparator(
+              translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+          ArtMethod* miranda_method = FindSameNameAndSignature(old_method_comparator,
+                                                               miranda_methods);
+          DCHECK(miranda_method != nullptr);
+          translated_method = miranda_method;
+        } else {
+          // Normal default method (changed from an older default or abstract interface method).
+          DCHECK(translation_it->second.IsTranslation());
+          translated_method = translation_it->second.GetTranslation();
+        }
         found_translation = true;
       }
       DCHECK(translated_method != nullptr);
@@ -6099,8 +6487,8 @@
 
 void ClassLinker::DumpForSigQuit(std::ostream& os) {
   ScopedObjectAccess soa(Thread::Current());
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   ReaderMutexLock mu(soa.Self(), *Locks::classlinker_classes_lock_);
   os << "Zygote loaded classes=" << NumZygoteClasses() << " post zygote classes="
@@ -6137,8 +6525,8 @@
 }
 
 size_t ClassLinker::NumLoadedClasses() {
-  if (dex_cache_image_class_lookup_required_) {
-    MoveImageClassesToClassTable();
+  if (dex_cache_boot_image_class_lookup_required_) {
+    AddBootImageClassesToClassTable();
   }
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   // Only return non zygote classes since these are the ones which apps which care about.
@@ -6241,7 +6629,9 @@
   }
 }
 
-jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) {
+jobject ClassLinker::CreatePathClassLoader(Thread* self,
+                                           std::vector<const DexFile*>& dex_files,
+                                           jobject parent_loader) {
   // SOAAlreadyRunnable is protected, and we need something to add a global reference.
   // We could move the jobject to the callers, but all call-sites do this...
   ScopedObjectAccessUnchecked soa(self);
@@ -6272,8 +6662,8 @@
   for (const DexFile* dex_file : dex_files) {
     StackHandleScope<3> hs2(self);
 
-    // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the
-    // oat file but we can leave it null.
+    // CreatePathClassLoader is only used by gtests and dex2oat. Index 0 of h_long_array is
+    // supposed to be the oat file but we can leave it null.
     Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc(
         self,
         kDexFileIndexStart + 1));
@@ -6319,9 +6709,10 @@
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
                                "Ljava/lang/ClassLoader;");
   DCHECK(parent_field != nullptr);
-  mirror::Object* boot_cl =
-      soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
-  parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
+  mirror::Object* parent = (parent_loader != nullptr)
+      ? soa.Decode<mirror::ClassLoader*>(parent_loader)
+      : soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
+  parent_field->SetObject<false>(h_path_class_loader.Get(), parent);
 
   // Make it a global ref and return.
   ScopedLocalRef<jobject> local_ref(
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 392efd2..29aac31 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -115,12 +115,15 @@
   ~ClassLinker();
 
   // Initialize class linker by bootstraping from dex files.
-  void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path)
+  bool InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path,
+                        std::string* error_msg)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
   // Initialize class linker from one or more images.
-  void InitFromImage() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
+  bool InitFromImage(std::string* error_msg)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
@@ -487,10 +490,17 @@
     return class_roots;
   }
 
-  // Move all of the image classes into the class table for faster lookups.
-  void MoveImageClassesToClassTable()
+  // Move all of the boot image classes into the class table for faster lookups.
+  void AddBootImageClassesToClassTable()
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Add image classes to the class table.
+  void AddImageClassesToClassTable(gc::space::ImageSpace* image_space,
+                                   mirror::ClassLoader* class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Move the class table to the pre-zygote table to reduce memory usage. This works by ensuring
   // that no more classes are ever added to the pre zygote table which makes it that the pages
   // always remain shared dirty instead of private dirty.
@@ -504,7 +514,10 @@
 
   // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files.
   // Note: the objects are not completely set up. Do not use this outside of tests and the compiler.
-  jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files)
+  // If parent_loader is null then we use the boot class loader.
+  jobject CreatePathClassLoader(Thread* self,
+                                std::vector<const DexFile*>& dex_files,
+                                jobject parent_loader)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -544,6 +557,17 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  struct DexCacheData {
+    // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
+    // not work properly.
+    jweak weak_root;
+    // The following two fields are caches to the DexCache's fields and here to avoid unnecessary
+    // jweak decode that triggers read barriers (and mark them alive unnecessarily and mess with
+    // class unloading.)
+    const DexFile* dex_file;
+    GcRoot<mirror::Class>* resolved_types;
+  };
+
  private:
   struct ClassLoaderData {
     jweak weak_root;  // Weak root to enable class unloading.
@@ -551,6 +575,15 @@
     LinearAlloc* allocator;
   };
 
+  // Ensures that the supertype of 'klass' ('supertype') is verified. Returns false and throws
+  // appropriate exceptions if verification failed hard. Returns true for successful verification or
+  // soft-failures.
+  bool AttemptSupertypeVerification(Thread* self,
+                                    Handle<mirror::Class> klass,
+                                    Handle<mirror::Class> supertype)
+      REQUIRES(!dex_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -572,7 +605,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FinishInit(Thread* self)
-  SHARED_REQUIRES(Locks::mutator_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // For early bootstrapping by Init
@@ -712,6 +745,83 @@
                    ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Does anything needed to make sure that the compiler will not generate a direct invoke to this
+  // method. Should only be called on non-invokable methods.
+  void EnsureThrowsInvocationError(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // A wrapper class representing the result of a method translation used for linking methods and
+  // updating superclass default methods. For each method in a classes vtable there are 4 states it
+  // could be in:
+  // 1) No translation is necessary. In this case there is no MethodTranslation object for it. This
+  //    is the standard case and is true when the method is not overridable by a default method,
+  //    the class defines a concrete implementation of the method, the default method implementation
+  //    remains the same, or an abstract method stayed abstract.
+  // 2) The method must be translated to a different default method. We note this with
+  //    CreateTranslatedMethod.
+  // 3) The method must be replaced with a conflict method. This happens when a superclass
+  //    implements an interface with a default method and this class implements an unrelated
+  //    interface that also defines that default method. We note this with CreateConflictingMethod.
+  // 4) The method must be replaced with an abstract miranda method. This happens when a superclass
+  //    implements an interface with a default method and this class implements a subinterface of
+  //    the superclass's interface which declares the default method abstract. We note this with
+  //    CreateAbstractMethod.
+  //
+  // When a method translation is unnecessary (case #1), we don't put it into the
+  // default_translation maps. So an instance of MethodTranslation must be in one of #2-#4.
+  class MethodTranslation {
+   public:
+    // This slot must become a default conflict method.
+    static MethodTranslation CreateConflictingMethod() {
+      return MethodTranslation(Type::kConflict, /*translation*/nullptr);
+    }
+
+    // This slot must become an abstract method.
+    static MethodTranslation CreateAbstractMethod() {
+      return MethodTranslation(Type::kAbstract, /*translation*/nullptr);
+    }
+
+    // Use the given method as the current value for this vtable slot during translation.
+    static MethodTranslation CreateTranslatedMethod(ArtMethod* new_method) {
+      return MethodTranslation(Type::kTranslation, new_method);
+    }
+
+    // Returns true if this is a method that must become a conflict method.
+    bool IsInConflict() const {
+      return type_ == Type::kConflict;
+    }
+
+    // Returns true if this is a method that must become an abstract method.
+    bool IsAbstract() const {
+      return type_ == Type::kAbstract;
+    }
+
+    // Returns true if this is a method that must become a different method.
+    bool IsTranslation() const {
+      return type_ == Type::kTranslation;
+    }
+
+    // Get the translated version of this method.
+    ArtMethod* GetTranslation() const {
+      DCHECK(IsTranslation());
+      DCHECK(translation_ != nullptr);
+      return translation_;
+    }
+
+   private:
+    enum class Type {
+      kTranslation,
+      kConflict,
+      kAbstract,
+    };
+
+    MethodTranslation(Type type, ArtMethod* translation)
+        : translation_(translation), type_(type) {}
+
+    ArtMethod* const translation_;
+    const Type type_;
+  };
+
   // Links the virtual methods for the given class and records any default methods that will need to
   // be updated later.
   //
@@ -728,9 +838,10 @@
   //                          scan, we therefore store the vtable index's that might need to be
   //                          updated with the method they will turn into.
   // TODO This whole default_translations thing is very dirty. There should be a better way.
-  bool LinkVirtualMethods(Thread* self,
-                          Handle<mirror::Class> klass,
-                          /*out*/std::unordered_map<size_t, ArtMethod*>* default_translations)
+  bool LinkVirtualMethods(
+        Thread* self,
+        Handle<mirror::Class> klass,
+        /*out*/std::unordered_map<size_t, MethodTranslation>* default_translations)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets up the interface lookup table (IFTable) in the correct order to allow searching for
@@ -740,6 +851,13 @@
                                  Handle<mirror::ObjectArray<mirror::Class>> interfaces)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+
+  enum class DefaultMethodSearchResult {
+    kDefaultFound,
+    kAbstractFound,
+    kDefaultConflict
+  };
+
   // Find the default method implementation for 'interface_method' in 'klass', if one exists.
   //
   // Arguments:
@@ -747,31 +865,31 @@
   // * target_method - The method we are trying to find a default implementation for.
   // * klass - The class we are searching for a definition of target_method.
   // * out_default_method - The pointer we will store the found default method to on success.
-  // * icce_message - A string we will store an appropriate IncompatibleClassChangeError message
-  //                  into in case of failure. Note we must do it this way since we do not know
-  //                  whether we can allocate the exception object, which could cause us to go to
-  //                  sleep.
   //
   // Return value:
-  // * True - There were no conflicting method implementations found in the class while searching
-  //          for target_method. The default method implementation is stored into out_default_method
-  //          if it was found.  Otherwise *out_default_method will be set to nullptr.
-  // * False - Conflicting method implementations were found when searching for target_method. The
-  //           value of *out_default_method is undefined and *icce_message is a string that should
-  //           be used to create an IncompatibleClassChangeError as soon as possible.
-  bool FindDefaultMethodImplementation(Thread* self,
-                                       ArtMethod* target_method,
-                                       Handle<mirror::Class> klass,
-                                       /*out*/ArtMethod** out_default_method,
-                                       /*out*/std::string* icce_message) const
+  // * kDefaultFound - There were no conflicting method implementations found in the class while
+  //                   searching for target_method. The default method implementation is stored into
+  //                   out_default_method.
+  // * kAbstractFound - There were no conflicting method implementations found in the class while
+  //                   searching for target_method but no default implementation was found either.
+  //                   out_default_method is set to null and the method should be considered not
+  //                   implemented.
+  // * kDefaultConflict - Conflicting method implementations were found when searching for
+  //                      target_method. The value of *out_default_method is null.
+  DefaultMethodSearchResult FindDefaultMethodImplementation(
+          Thread* self,
+          ArtMethod* target_method,
+          Handle<mirror::Class> klass,
+          /*out*/ArtMethod** out_default_method) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Sets the imt entries and fixes up the vtable for the given class by linking all the interface
   // methods. See LinkVirtualMethods for an explanation of what default_translations is.
-  bool LinkInterfaceMethods(Thread* self,
-                            Handle<mirror::Class> klass,
-                            const std::unordered_map<size_t, ArtMethod*>& default_translations,
-                            ArtMethod** out_imt)
+  bool LinkInterfaceMethods(
+          Thread* self,
+          Handle<mirror::Class> klass,
+          const std::unordered_map<size_t, MethodTranslation>& default_translations,
+          ArtMethod** out_imt)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   bool LinkStaticFields(Thread* self, Handle<mirror::Class> klass, size_t* class_size)
@@ -801,7 +919,8 @@
   size_t GetDexCacheCount() SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
     return dex_caches_.size();
   }
-  const std::list<jweak>& GetDexCaches() SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
+  const std::list<DexCacheData>& GetDexCachesData()
+      SHARED_REQUIRES(Locks::mutator_lock_, dex_lock_) {
     return dex_caches_;
   }
 
@@ -815,7 +934,7 @@
   void EnsurePreverifiedMethods(Handle<mirror::Class> c)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  mirror::Class* LookupClassFromImage(const char* descriptor)
+  mirror::Class* LookupClassFromBootImage(const char* descriptor)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Returns null if not found.
@@ -848,15 +967,10 @@
 
   // Throw the class initialization failure recorded when first trying to initialize the given
   // class.
-  // Note: Currently we only store the descriptor, so we cannot throw the exact throwable, only
-  //       a recreation with a custom string.
-  void ThrowEarlierClassFailure(mirror::Class* c)
+  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  bool HasInitWithString(Thread* self, const char* descriptor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
-
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
@@ -869,9 +983,9 @@
   std::vector<std::unique_ptr<const DexFile>> opened_dex_files_;
 
   mutable ReaderWriterMutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  // JNI weak globals to allow dex caches to get unloaded. We lazily delete weak globals when we
-  // register new dex files.
-  std::list<jweak> dex_caches_ GUARDED_BY(dex_lock_);
+  // JNI weak globals and side data to allow dex caches to get unloaded. We lazily delete weak
+  // globals when we register new dex files.
+  std::list<DexCacheData> dex_caches_ GUARDED_BY(dex_lock_);
 
   // This contains the class loaders which have class tables. It is populated by
   // InsertClassTableForClassLoader.
@@ -884,8 +998,8 @@
   // New class roots, only used by CMS since the GC needs to mark these in the pause.
   std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
-  // Do we need to search dex caches to find image classes?
-  bool dex_cache_image_class_lookup_required_;
+  // Do we need to search dex caches to find boot image classes?
+  bool dex_cache_boot_image_class_lookup_required_;
   // Number of times we've searched dex caches for a class. After a certain number of misses we move
   // the classes into the class_table_ to avoid dex cache based searches.
   Atomic<uint32_t> failed_dex_cache_class_lookups_;
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 04b8900..2c086c5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -515,7 +515,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, sfields_), "sFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_class_), "verifyErrorClass");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
     addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_), "virtualMethods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index b6b5141..f705a50 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -553,7 +553,8 @@
 
   Thread* self = Thread::Current();
   jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
-                                                                                     class_path);
+                                                                                     class_path,
+                                                                                     nullptr);
   self->SetClassLoaderOverride(class_loader);
   return class_loader;
 }
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index a474ae6..6da2bef 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -174,13 +174,6 @@
   DISALLOW_COPY_AND_ASSIGN(CheckJniAbortCatcher);
 };
 
-// TODO: When read barrier works with the compiler, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER() \
-  if (kUseReadBarrier) { \
-    printf("WARNING: TEST DISABLED FOR READ BARRIER\n"); \
-    return; \
-  }
-
 #define TEST_DISABLED_FOR_MIPS() \
   if (kRuntimeISA == kMips) { \
     printf("WARNING: TEST DISABLED FOR MIPS\n"); \
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index de692d1..d68b463 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -242,6 +242,15 @@
   va_end(args);
 }
 
+void ThrowIncompatibleClassChangeErrorForMethodConflict(ArtMethod* method) {
+  DCHECK(method != nullptr);
+  ThrowException("Ljava/lang/IncompatibleClassChangeError;",
+                 /*referrer*/nullptr,
+                 StringPrintf("Conflicting default method implementations %s",
+                              PrettyMethod(method).c_str()).c_str());
+}
+
+
 // IOException
 
 void ThrowIOException(const char* fmt, ...) {
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 2402e6f..2a0934f 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -120,6 +120,9 @@
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
 
+void ThrowIncompatibleClassChangeErrorForMethodConflict(ArtMethod* method)
+    SHARED_REQUIRES(Locks::mutator_lock_) COLD_ATTR;
+
 // IOException
 
 void ThrowIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)))
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 7117be9..51f57c3 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -69,10 +69,25 @@
   return alloc_record_count;
 }
 
+// Takes a method and returns a 'canonical' one if the method is default (and therefore potentially
+// copied from some other class). This ensures that the debugger does not get confused as to which
+// method we are in.
+static ArtMethod* GetCanonicalMethod(ArtMethod* m)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(!m->IsDefault())) {
+    return m;
+  } else {
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
+                                                      m->GetDexMethodIndex(),
+                                                      sizeof(void*));
+  }
+}
+
 class Breakpoint : public ValueObject {
  public:
   Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
-    : method_(method),
+    : method_(GetCanonicalMethod(method)),
       dex_pc_(dex_pc),
       deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
@@ -99,6 +114,12 @@
     return deoptimization_kind_;
   }
 
+  // Returns true if the method of this breakpoint and the passed in method should be considered the
+  // same. That is, they are either the same method or they are copied from the same method.
+  bool IsInMethod(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return method_ == GetCanonicalMethod(m);
+  }
+
  private:
   // The location of this breakpoint.
   ArtMethod* method_;
@@ -306,12 +327,12 @@
   return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
-static bool IsBreakpoint(const ArtMethod* m, uint32_t dex_pc)
+static bool IsBreakpoint(ArtMethod* m, uint32_t dex_pc)
     REQUIRES(!Locks::breakpoint_lock_)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
     }
@@ -1231,7 +1252,15 @@
     return error;
   }
   Thread* self = Thread::Current();
-  mirror::Object* new_object = c->AllocObject(self);
+  mirror::Object* new_object;
+  if (c->IsStringClass()) {
+    // Special case for java.lang.String.
+    gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+    mirror::SetStringCountVisitor visitor(0);
+    new_object = mirror::String::Alloc<true>(self, 0, allocator_type, visitor);
+  } else {
+    new_object = c->AllocObject(self);
+  }
   if (new_object == nullptr) {
     DCHECK(self->IsExceptionPending());
     self->ClearException();
@@ -1274,9 +1303,9 @@
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
-static JDWP::MethodId ToMethodId(const ArtMethod* m)
+static JDWP::MethodId ToMethodId(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
+  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(GetCanonicalMethod(m)));
 }
 
 static ArtField* FromFieldId(JDWP::FieldId fid)
@@ -2676,26 +2705,26 @@
     case JDWP::JT_BOOLEAN:
     case JDWP::JT_BYTE:
       CHECK_EQ(width, 1U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_SHORT:
     case JDWP::JT_CHAR:
       CHECK_EQ(width, 2U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_INT:
       CHECK_EQ(width, 4U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kIntVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
     case JDWP::JT_FLOAT:
       CHECK_EQ(width, 4U);
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(value), kFloatVReg)) {
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(value), kFloatVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, static_cast<uint32_t>(value));
       }
       break;
@@ -2713,7 +2742,7 @@
         VLOG(jdwp) << tag << " object " << o << " is an invalid object";
         return JDWP::ERR_INVALID_OBJECT;
       }
-      if (!visitor.SetVRegFromDebugger(m, vreg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
+      if (!visitor.SetVReg(m, vreg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(o)),
                                  kReferenceVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, reinterpret_cast<uintptr_t>(o));
       }
@@ -2721,14 +2750,14 @@
     }
     case JDWP::JT_DOUBLE: {
       CHECK_EQ(width, 8U);
-      if (!visitor.SetVRegPairFromDebugger(m, vreg, value, kDoubleLoVReg, kDoubleHiVReg)) {
+      if (!visitor.SetVRegPair(m, vreg, value, kDoubleLoVReg, kDoubleHiVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, value);
       }
       break;
     }
     case JDWP::JT_LONG: {
       CHECK_EQ(width, 8U);
-      if (!visitor.SetVRegPairFromDebugger(m, vreg, value, kLongLoVReg, kLongHiVReg)) {
+      if (!visitor.SetVRegPair(m, vreg, value, kLongLoVReg, kLongHiVReg)) {
         return FailSetLocalValue(visitor, vreg, tag, value);
       }
       break;
@@ -2755,7 +2784,7 @@
   if (m == nullptr) {
     memset(location, 0, sizeof(*location));
   } else {
-    location->method = m;
+    location->method = GetCanonicalMethod(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
   }
 }
@@ -3206,7 +3235,7 @@
 static const Breakpoint* FindFirstBreakpointForMethod(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       return &breakpoint;
     }
   }
@@ -3223,7 +3252,7 @@
                                            DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
     }
   }
@@ -3266,19 +3295,22 @@
 
   if (first_breakpoint == nullptr) {
     // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // inlined or default, we deoptimize everything; otherwise we deoptimize only this method. We
+    // deoptimize with defaults because we do not know everywhere they are used. It is possible some
+    // of the copies could be inlined or otherwise missed.
+    // TODO Deoptimizing on default methods might not be necessary in all cases.
     // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
     // Therefore we must not hold any lock when we call it.
-    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    bool need_full_deoptimization = m->IsDefault() || IsMethodPossiblyInlined(self, m);
     if (need_full_deoptimization) {
-      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+      VLOG(jdwp) << "Need full deoptimization because of possible inlining or copying of method "
                  << PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
       // We don't need to deoptimize if the method has not been compiled.
-      ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-      const bool is_compiled = class_linker->GetOatMethodQuickCodeFor(m) != nullptr;
+      const bool is_compiled = m->HasAnyCompiledCode();
       if (is_compiled) {
+        ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
         // If the method may be called through its direct code pointer (without loading
         // its updated entrypoint), we need full deoptimization to not miss the breakpoint.
         if (class_linker->MayBeCalledWithDirectCodePointer(m)) {
@@ -3351,7 +3383,7 @@
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
   DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
       deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
       DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
@@ -4061,13 +4093,15 @@
   if (is_constructor) {
     // If we invoked a constructor (which actually returns void), return the receiver,
     // unless we threw, in which case we return null.
-    result_tag = JDWP::JT_OBJECT;
+    DCHECK_EQ(JDWP::JT_VOID, result_tag);
     if (exceptionObjectId == 0) {
       // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
       // object registry.
       result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+      result_tag = TagFromObject(soa, pReq->receiver.Read());
     } else {
       result_value = 0;
+      result_tag = JDWP::JT_OBJECT;
     }
   }
 
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 3a93aac..4163e2e 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -231,7 +231,14 @@
       return nullptr;
     }
     size_t length = sbuf.st_size;
-    map.reset(MemMap::MapFile(length, PROT_READ, MAP_PRIVATE, fd, 0, location, error_msg));
+    map.reset(MemMap::MapFile(length,
+                              PROT_READ,
+                              MAP_PRIVATE,
+                              fd,
+                              0,
+                              /*low_4gb*/false,
+                              location,
+                              error_msg));
     if (map.get() == nullptr) {
       DCHECK(!error_msg->empty());
       return nullptr;
@@ -1863,10 +1870,10 @@
         Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
             klass->GetDexFile(), index, dex_cache, class_loader, true);
-        Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
         if (enum_field == nullptr) {
           return false;
         } else {
+          Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass()));
           Runtime::Current()->GetClassLinker()->EnsureInitialized(self, field_class, true, true);
           element_object = enum_field->GetObject(field_class.Get());
           set_object = true;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index e7877b2..1e44f50 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -722,6 +722,7 @@
 
   //
   const CodeItem* GetCodeItem(const uint32_t code_off) const {
+    DCHECK_LT(code_off, size_) << "Code item offset larger then maximum allowed offset";
     if (code_off == 0) {
       return nullptr;  // native or abstract method
     } else {
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 723ee74..2819670 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -189,8 +189,14 @@
   if (program_header_only_) {
     // first just map ELF header to get program header size information
     size_t elf_header_size = sizeof(Elf_Ehdr);
-    if (!SetMap(MemMap::MapFile(elf_header_size, prot, flags, file_->Fd(), 0,
-                                file_->GetPath().c_str(), error_msg),
+    if (!SetMap(MemMap::MapFile(elf_header_size,
+                                prot,
+                                flags,
+                                file_->Fd(),
+                                0,
+                                /*low4_gb*/false,
+                                file_->GetPath().c_str(),
+                                error_msg),
                 error_msg)) {
       return false;
     }
@@ -202,16 +208,28 @@
                                 sizeof(Elf_Ehdr), file_->GetPath().c_str());
       return false;
     }
-    if (!SetMap(MemMap::MapFile(program_header_size, prot, flags, file_->Fd(), 0,
-                                file_->GetPath().c_str(), error_msg),
+    if (!SetMap(MemMap::MapFile(program_header_size,
+                                prot,
+                                flags,
+                                file_->Fd(),
+                                0,
+                                /*low4_gb*/false,
+                                file_->GetPath().c_str(),
+                                error_msg),
                 error_msg)) {
       *error_msg = StringPrintf("Failed to map ELF program headers: %s", error_msg->c_str());
       return false;
     }
   } else {
     // otherwise map entire file
-    if (!SetMap(MemMap::MapFile(file_->GetLength(), prot, flags, file_->Fd(), 0,
-                                file_->GetPath().c_str(), error_msg),
+    if (!SetMap(MemMap::MapFile(file_->GetLength(),
+                                prot,
+                                flags,
+                                file_->Fd(),
+                                0,
+                                /*low4_gb*/false,
+                                file_->GetPath().c_str(),
+                                error_msg),
                 error_msg)) {
       *error_msg = StringPrintf("Failed to map ELF file: %s", error_msg->c_str());
       return false;
@@ -1258,9 +1276,12 @@
       std::unique_ptr<MemMap> segment(
           MemMap::MapFileAtAddress(p_vaddr,
                                    program_header->p_filesz,
-                                   prot, flags, file_->Fd(),
+                                   prot,
+                                   flags,
+                                   file_->Fd(),
                                    program_header->p_offset,
-                                   true,  // implies MAP_FIXED
+                                   /*low4_gb*/false,
+                                   /*reuse*/true,  // implies MAP_FIXED
                                    file_->GetPath().c_str(),
                                    error_msg));
       if (segment.get() == nullptr) {
@@ -1775,8 +1796,14 @@
                               file->GetPath().c_str());
     return nullptr;
   }
-  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT, PROT_READ, MAP_PRIVATE, file->Fd(), 0,
-                                              file->GetPath().c_str(), error_msg));
+  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT,
+                                              PROT_READ,
+                                              MAP_PRIVATE,
+                                              file->Fd(),
+                                              0,
+                                              /*low4_gb*/false,
+                                              file->GetPath().c_str(),
+                                              error_msg));
   if (map == nullptr && map->Size() != EI_NIDENT) {
     return nullptr;
   }
@@ -1809,8 +1836,14 @@
                               file->GetPath().c_str());
     return nullptr;
   }
-  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT, PROT_READ, MAP_PRIVATE, file->Fd(), 0,
-                                              file->GetPath().c_str(), error_msg));
+  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT,
+                                              PROT_READ,
+                                              MAP_PRIVATE,
+                                              file->Fd(),
+                                              0,
+                                              /*low4_gb*/false,
+                                              file->GetPath().c_str(),
+                                              error_msg));
   if (map == nullptr && map->Size() != EI_NIDENT) {
     return nullptr;
   }
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 21e4e44..dccb1da 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -598,8 +598,12 @@
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+    mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+    if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+      // The super class does not have the method.
+      return nullptr;
+    }
+    return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index e57569e..87e29ae 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -364,36 +364,34 @@
       (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
   ArtMethod* outer_method = *caller_sp;
   ArtMethod* caller = outer_method;
-
-  if (outer_method != nullptr) {
-    const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
-    if (current_code->IsOptimized()) {
-      if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
-        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-        DCHECK(stack_map.IsValid());
-        if (stack_map.HasInlineInfo(encoding)) {
-          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-        }
-      } else {
-        // We're instrumenting, just use the StackVisitor which knows how to
-        // handle instrumented frames.
-        NthCallerVisitor visitor(Thread::Current(), 1, true);
-        visitor.WalkStack();
-        caller = visitor.caller;
+  if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
+    if (outer_method != nullptr) {
+      const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
+      if (current_code->IsOptimized()) {
+          uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+          CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+          StackMapEncoding encoding = code_info.ExtractEncoding();
+          StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+          DCHECK(stack_map.IsValid());
+          if (stack_map.HasInlineInfo(encoding)) {
+            InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+            caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+          }
       }
     }
-  }
-
-  if (kIsDebugBuild && do_caller_check) {
-    // Note that do_caller_check is optional, as this method can be called by
-    // stubs, and tests without a proper call stack.
+    if (kIsDebugBuild && do_caller_check) {
+      // Note that do_caller_check is optional, as this method can be called by
+      // stubs, and tests without a proper call stack.
+      NthCallerVisitor visitor(Thread::Current(), 1, true);
+      visitor.WalkStack();
+      CHECK_EQ(caller, visitor.caller);
+    }
+  } else {
+    // We're instrumenting, just use the StackVisitor which knows how to
+    // handle instrumented frames.
     NthCallerVisitor visitor(Thread::Current(), 1, true);
     visitor.WalkStack();
-    CHECK_EQ(caller, visitor.caller);
+    caller = visitor.caller;
   }
 
   return caller;
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 3d3f7a1..27865e3 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -31,12 +31,12 @@
 namespace mirror {
 class Array;
 class Class;
+template<class MirrorType> class CompressedReference;
 class Object;
-template<class MirrorType>
-class CompressedReference;
 }  // namespace mirror
 
 class ArtMethod;
+template<class MirrorType> class GcRoot;
 class Thread;
 
 // Pointers to functions that are called by quick compiler generated code via thread-local storage.
@@ -72,9 +72,14 @@
                            Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
 
+
 // Read barrier entrypoints.
-// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to this function directly.
-// For x86 and x86_64, compilers need a wrapper assembly function, to handle mismatch in ABI.
+//
+// Compilers for ARM, ARM64, MIPS, MIPS64 can insert a call to these
+// functions directly.  For x86 and x86-64, compilers need a wrapper
+// assembly function, to handle mismatch in ABI.
+
+// Read barrier entrypoint for heap references.
 // This is the read barrier slow path for instance and static fields and reference-type arrays.
 // TODO: Currently the read barrier does not have a fast path for compilers to directly generate.
 // Ideally the slow path should only take one parameter "ref".
@@ -82,6 +87,10 @@
                                               uint32_t offset)
     SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
 
+// Read barrier entrypoint for GC roots.
+extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root)
+    SHARED_REQUIRES(Locks::mutator_lock_) HOT_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_H_
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 73d8ae7..ee7b986 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -146,7 +146,8 @@
   V(NewStringFromStringBuilder, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
-  V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t)
+  V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \
+  V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*)
 
 #endif  // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
 #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_   // #define is only for lint.
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 7361d34..7ec5fc5 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -14,14 +14,16 @@
  * limitations under the License.
  */
 
+#include <stdint.h>
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "callee_save_frame.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
+#include "gc_root-inl.h"
 #include "mirror/class-inl.h"
-
-#include <stdint.h>
+#include "mirror/object_reference.h"
 
 namespace art {
 
@@ -560,13 +562,25 @@
 // TODO: Currently the read barrier does not have a fast path. Ideally the slow path should only
 // take one parameter "ref", which is given by the fast path.
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
-                                              mirror::Object* obj, uint32_t offset) {
-  DCHECK(kUseReadBarrier);
+                                              mirror::Object* obj,
+                                              uint32_t offset) {
+  DCHECK(kEmitCompilerReadBarrier);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
   mirror::HeapReference<mirror::Object>* ref_addr =
-      reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr);
-  return ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, true>(obj, MemberOffset(offset),
-                                                                      ref_addr);
+     reinterpret_cast<mirror::HeapReference<mirror::Object>*>(raw_addr);
+  constexpr ReadBarrierOption kReadBarrierOption =
+      kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+  mirror::Object* result =
+      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption, true>(obj,
+                                                                     MemberOffset(offset),
+                                                                     ref_addr);
+  return result;
+}
+
+extern "C" mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root) {
+  DCHECK(kEmitCompilerReadBarrier);
+  // TODO: Pass a GcRootSource object as second argument to GcRoot::Read?
+  return root->Read();
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 5eda6d6..c41ee45 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -645,8 +645,8 @@
   // frame.
   ScopedQuickEntrypointChecks sqec(self);
 
-  if (method->IsAbstract()) {
-    ThrowAbstractMethodError(method);
+  if (UNLIKELY(!method->IsInvokable())) {
+    method->ThrowInvocationTimeError();
     return 0;
   }
 
@@ -1019,15 +1019,21 @@
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
         << PrettyMethod(called) << " " << invoke_type;
-    if (virtual_or_interface) {
-      // Refine called method based on receiver.
-      CHECK(receiver != nullptr) << invoke_type;
-
+    if (virtual_or_interface || invoke_type == kSuper) {
+      // Refine called method based on receiver for kVirtual/kInterface, and
+      // caller for kSuper.
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
-      } else {
+      } else if (invoke_type == kInterface) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+      } else {
+        DCHECK_EQ(invoke_type, kSuper);
+        CHECK(caller != nullptr) << invoke_type;
+        called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
+            called->GetMethodIndex(), sizeof(void*));
       }
 
       CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 78f56ee..8587ede 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -302,8 +302,10 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pNewStringFromStringBuilder, pReadBarrierJni,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierSlow, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow,
+                         sizeof(void*));
 
-    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierSlow)
+    CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pReadBarrierForRootSlow)
             + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all);
   }
 };
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 1361f7b..8f7bb94 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -487,7 +487,7 @@
 
 // Mark all references to the alloc space(s).
 void ModUnionTableCardCache::UpdateAndMarkReferences(MarkObjectVisitor* visitor) {
-  auto* image_space = heap_->GetImageSpace();
+  auto* image_space = heap_->GetBootImageSpace();
   // If we don't have an image space, just pass in space_ as the immune space. Pass in the same
   // space_ instead of image_space to avoid a null check in ModUnionUpdateObjectReferencesVisitor.
   CardBitVisitor bit_visitor(visitor, space_, image_space != nullptr ? image_space : space_,
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 4a49712..bcfcb89 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -23,7 +23,7 @@
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/reference_processor.h"
 #include "gc/space/image_space.h"
-#include "gc/space/space.h"
+#include "gc/space/space-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -134,10 +134,10 @@
   WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : heap_->GetContinuousSpaces()) {
-    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
-        || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
+    if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
+        space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
       const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" :
           "cc zygote space bitmap";
       // TODO: try avoiding using bitmaps for image/zygote to save space.
@@ -164,7 +164,7 @@
               << reinterpret_cast<void*>(region_space_->Limit());
   }
   CheckEmptyMarkStack();
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   bytes_moved_.StoreRelaxed(0);
   objects_moved_.StoreRelaxed(0);
   if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit ||
@@ -177,7 +177,11 @@
   BindBitmaps();
   if (kVerboseMode) {
     LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_;
-    LOG(INFO) << "Immune region: " << immune_region_.Begin() << "-" << immune_region_.End();
+    LOG(INFO) << "Largest immune region: " << immune_spaces_.GetLargestImmuneRegion().Begin()
+              << "-" << immune_spaces_.GetLargestImmuneRegion().End();
+    for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
+      LOG(INFO) << "Immune space: " << *space;
+    }
     LOG(INFO) << "GC end of InitializePhase";
   }
 }
@@ -300,7 +304,7 @@
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
-    DCHECK(collector_->immune_region_.ContainsObject(obj));
+    DCHECK(collector_->immune_spaces_.ContainsObject(obj));
     accounting::ContinuousSpaceBitmap* cc_bitmap =
         collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
     DCHECK(cc_bitmap != nullptr)
@@ -358,13 +362,17 @@
     // scan the image objects from roots by relying on the card table,
     // but it's necessary for the RB to-space invariant to hold.
     TimingLogger::ScopedTiming split1("VisitImageRoots", GetTimings());
-    gc::space::ImageSpace* image = heap_->GetImageSpace();
-    if (image != nullptr) {
-      mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots();
-      mirror::Object* marked_image_root = Mark(image_root);
-      CHECK_EQ(image_root, marked_image_root) << "An image object does not move";
-      if (ReadBarrier::kEnableToSpaceInvariantChecks) {
-        AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root);
+    for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
+      if (space->IsImageSpace()) {
+        gc::space::ImageSpace* image = space->AsImageSpace();
+        if (image != nullptr) {
+          mirror::ObjectArray<mirror::Object>* image_root = image->GetImageHeader().GetImageRoots();
+          mirror::Object* marked_image_root = Mark(image_root);
+          CHECK_EQ(image_root, marked_image_root) << "An image object does not move";
+          if (ReadBarrier::kEnableToSpaceInvariantChecks) {
+            AssertToSpaceInvariant(nullptr, MemberOffset(0), marked_image_root);
+          }
+        }
       }
     }
   }
@@ -379,15 +387,13 @@
   }
 
   // Immune spaces.
-  for (auto& space : heap_->GetContinuousSpaces()) {
-    if (immune_region_.ContainsSpace(space)) {
-      DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-      accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
-      ConcurrentCopyingImmuneSpaceObjVisitor visitor(this);
-      live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
-                                    reinterpret_cast<uintptr_t>(space->Limit()),
-                                    visitor);
-    }
+  for (auto& space : immune_spaces_.GetSpaces()) {
+    DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
+    accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
+    ConcurrentCopyingImmuneSpaceObjVisitor visitor(this);
+    live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                  reinterpret_cast<uintptr_t>(space->Limit()),
+                                  visitor);
   }
 
   Thread* self = Thread::Current();
@@ -1074,7 +1080,7 @@
                 !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) {
     // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We
     // will change it to black or white later in ReferenceQueue::DequeuePendingReference().
-    CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
+    DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref;
   } else {
     // We may occasionally leave a Reference black or white in the queue if its referent happens to
     // be concurrently marked after the Scan() call above has enqueued the Reference, in which case
@@ -1083,9 +1089,10 @@
     if (kUseBakerReadBarrier) {
       if (region_space_->IsInToSpace(to_ref)) {
         // If to-space, change from gray to white.
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::WhitePtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::WhitePtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
       } else {
         // If non-moving space/unevac from space, change from gray
@@ -1095,9 +1102,10 @@
         // indicate non-moving objects that have been marked
         // through. Note we'd need to change from black to white
         // later (concurrently).
-        bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                           ReadBarrier::BlackPtr());
-        CHECK(success) << "Must succeed as we won the race.";
+        bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
+            ReadBarrier::GrayPtr(),
+            ReadBarrier::BlackPtr());
+        DCHECK(success) << "Must succeed as we won the race.";
         DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr());
       }
     }
@@ -1201,7 +1209,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->IsContinuousMemMapAllocSpace()) {
       space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
-      if (space == region_space_ || immune_region_.ContainsSpace(space)) {
+      if (space == region_space_ || immune_spaces_.ContainsSpace(space)) {
         continue;
       }
       TimingLogger::ScopedTiming split2(
@@ -1221,9 +1229,6 @@
  public:
   explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc)
       : collector_(cc) {}
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
   void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_)
       SHARED_REQUIRES(Locks::heap_bitmap_lock_) {
     DCHECK(obj != nullptr);
@@ -1503,8 +1508,8 @@
     }
   } else {
     // In a non-moving space.
-    if (immune_region_.ContainsObject(obj)) {
-      LOG(INFO) << "holder is in the image or the zygote space.";
+    if (immune_spaces_.ContainsObject(obj)) {
+      LOG(INFO) << "holder is in an immune image or the zygote space.";
       accounting::ContinuousSpaceBitmap* cc_bitmap =
           cc_heap_bitmap_->GetContinuousSpaceBitmap(obj);
       CHECK(cc_bitmap != nullptr)
@@ -1515,7 +1520,7 @@
         LOG(INFO) << "holder is NOT marked in the bit map.";
       }
     } else {
-      LOG(INFO) << "holder is in a non-moving (or main) space.";
+      LOG(INFO) << "holder is in a non-immune, non-moving (or main) space.";
       accounting::ContinuousSpaceBitmap* mark_bitmap =
           heap_mark_bitmap_->GetContinuousSpaceBitmap(obj);
       accounting::LargeObjectBitmap* los_bitmap =
@@ -1543,7 +1548,7 @@
 void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj,
                                                                mirror::Object* ref) {
   // In a non-moving spaces. Check that the ref is marked.
-  if (immune_region_.ContainsObject(ref)) {
+  if (immune_spaces_.ContainsObject(ref)) {
     accounting::ContinuousSpaceBitmap* cc_bitmap =
         cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
     CHECK(cc_bitmap != nullptr)
@@ -1928,7 +1933,7 @@
     }
   } else {
     // from_ref is in a non-moving space.
-    if (immune_region_.ContainsObject(from_ref)) {
+    if (immune_spaces_.ContainsObject(from_ref)) {
       accounting::ContinuousSpaceBitmap* cc_bitmap =
           cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref);
       DCHECK(cc_bitmap != nullptr)
@@ -1982,7 +1987,7 @@
 mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) {
   // ref is in a non-moving space (from_ref == to_ref).
   DCHECK(!region_space_->HasAddress(ref)) << ref;
-  if (immune_region_.ContainsObject(ref)) {
+  if (immune_spaces_.ContainsObject(ref)) {
     accounting::ContinuousSpaceBitmap* cc_bitmap =
         cc_heap_bitmap_->GetContinuousSpaceBitmap(ref);
     DCHECK(cc_bitmap != nullptr)
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 27726e2..5d21c59 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -19,7 +19,7 @@
 
 #include "barrier.h"
 #include "garbage_collector.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -200,7 +200,7 @@
   bool is_marking_;                       // True while marking is ongoing.
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
-  ImmuneRegion immune_region_;
+  ImmuneSpaces immune_spaces_;
   std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_;
   std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_;
   accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc
index 3e1c944..8a04c17 100644
--- a/runtime/gc/collector/immune_region.cc
+++ b/runtime/gc/collector/immune_region.cc
@@ -32,39 +32,6 @@
   SetEnd(nullptr);
 }
 
-bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) {
-  // Bind live to mark bitmap if necessary.
-  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    CHECK(space->IsContinuousMemMapAllocSpace());
-    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
-  }
-  mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin());
-  mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit());
-  if (IsEmpty()) {
-    SetBegin(space_begin);
-    SetEnd(space_limit);
-  } else {
-    if (space_limit <= begin_) {  // Space is before the immune region.
-      SetBegin(space_begin);
-    } else if (space_begin >= end_) {  // Space is after the immune region.
-      SetEnd(space_limit);
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool ImmuneRegion::ContainsSpace(const space::ContinuousSpace* space) const {
-  bool contains =
-      begin_ <= reinterpret_cast<mirror::Object*>(space->Begin()) &&
-      end_ >= reinterpret_cast<mirror::Object*>(space->Limit());
-  if (kIsDebugBuild && contains) {
-    // A bump pointer space shoult not be in the immune region.
-    DCHECK(space->GetType() != space::kSpaceTypeBumpPointerSpace);
-  }
-  return contains;
-}
 
 }  // namespace collector
 }  // namespace gc
diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h
index 3ead501..b60426d 100644
--- a/runtime/gc/collector/immune_region.h
+++ b/runtime/gc/collector/immune_region.h
@@ -39,35 +39,34 @@
 class ImmuneRegion {
  public:
   ImmuneRegion();
+
   void Reset();
-  bool AddContinuousSpace(space::ContinuousSpace* space)
-      REQUIRES(Locks::heap_bitmap_lock_);
-  bool ContainsSpace(const space::ContinuousSpace* space) const;
+
   // Returns true if an object is inside of the immune region (assumed to be marked).
-  bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE {
+  ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const {
     // Note: Relies on integer underflow behavior.
     return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_;
   }
+
   void SetBegin(mirror::Object* begin) {
     begin_ = begin;
     UpdateSize();
   }
+
   void SetEnd(mirror::Object* end) {
     end_ = end;
     UpdateSize();
   }
 
-  mirror::Object* Begin() {
+  mirror::Object* Begin() const {
     return begin_;
   }
-  mirror::Object* End() {
+
+  mirror::Object* End() const {
     return end_;
   }
 
  private:
-  bool IsEmpty() const {
-    return size_ == 0;
-  }
   void UpdateSize() {
     size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_);
   }
diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc
new file mode 100644
index 0000000..8f9a9e2
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "immune_spaces.h"
+
+#include "gc/space/space-inl.h"
+#include "mirror/object.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+void ImmuneSpaces::Reset() {
+  spaces_.clear();
+  largest_immune_region_.Reset();
+}
+
+void ImmuneSpaces::CreateLargestImmuneRegion() {
+  uintptr_t best_begin = 0u;
+  uintptr_t best_end = 0u;
+  uintptr_t cur_begin = 0u;
+  uintptr_t cur_end = 0u;
+  // TODO: If the last space is an image space, we may include its oat file in the immune region.
+  // This could potentially hide heap corruption bugs if there is invalid pointers that point into
+  // the boot oat code
+  for (space::ContinuousSpace* space : GetSpaces()) {
+    uintptr_t space_begin = reinterpret_cast<uintptr_t>(space->Begin());
+    uintptr_t space_end = reinterpret_cast<uintptr_t>(space->Limit());
+    if (space->IsImageSpace()) {
+      // For the boot image, the boot oat file is always directly after. For app images it may not
+      // be if the app image was mapped at a random address.
+      space::ImageSpace* image_space = space->AsImageSpace();
+      // Update the end to include the other non-heap sections.
+      space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize);
+      uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin());
+      uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd());
+      if (space_end == oat_begin) {
+        DCHECK_GE(oat_end, oat_begin);
+        space_end = oat_end;
+      }
+    }
+    if (cur_begin == 0u) {
+      cur_begin = space_begin;
+      cur_end = space_end;
+    } else if (cur_end == space_begin) {
+      // Extend current region.
+      cur_end = space_end;
+    } else {
+      // Reset.
+      cur_begin = 0;
+      cur_end = 0;
+    }
+    if (cur_end - cur_begin > best_end - best_begin) {
+      // Improvement, update the best range.
+      best_begin = cur_begin;
+      best_end = cur_end;
+    }
+  }
+  largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin));
+  largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end));
+}
+
+void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) {
+  DCHECK(spaces_.find(space) == spaces_.end()) << *space;
+  // Bind live to mark bitmap if necessary.
+  if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
+  }
+  spaces_.insert(space);
+  CreateLargestImmuneRegion();
+}
+
+bool ImmuneSpaces::CompareByBegin::operator()(space::ContinuousSpace* a, space::ContinuousSpace* b)
+    const {
+  return a->Begin() < b->Begin();
+}
+
+bool ImmuneSpaces::ContainsSpace(space::ContinuousSpace* space) const {
+  return spaces_.find(space) != spaces_.end();
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/immune_spaces.h b/runtime/gc/collector/immune_spaces.h
new file mode 100644
index 0000000..72cb60d
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
+#define ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "gc/space/space.h"
+#include "immune_region.h"
+
+#include <set>
+
+namespace art {
+namespace gc {
+namespace space {
+class ContinuousSpace;
+}  // namespace space
+
+namespace collector {
+
+// ImmuneSpaces is a set of spaces which are not going to have any objects become marked during the
+// GC.
+class ImmuneSpaces {
+  class CompareByBegin {
+   public:
+    bool operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) const;
+  };
+
+ public:
+  ImmuneSpaces() {}
+  void Reset();
+
+  // Add a continuous space to the immune spaces set.
+  void AddSpace(space::ContinuousSpace* space) REQUIRES(Locks::heap_bitmap_lock_);
+
+  // Returns true if an object is inside of the immune region (assumed to be marked). Only returns
+  // true for the largest immune region. The object can still be inside of an immune space.
+  ALWAYS_INLINE bool IsInImmuneRegion(const mirror::Object* obj) const {
+    return largest_immune_region_.ContainsObject(obj);
+  }
+
+  // Return true if the spaces is contained.
+  bool ContainsSpace(space::ContinuousSpace* space) const;
+
+  // Return the set of spaces in the immune region.
+  const std::set<space::ContinuousSpace*, CompareByBegin>& GetSpaces() {
+    return spaces_;
+  }
+
+  // Return the associated largest immune region.
+  const ImmuneRegion& GetLargestImmuneRegion() const {
+    return largest_immune_region_;
+  }
+
+  // Return true if the object is contained by any of the immune space.s
+  ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const {
+    if (largest_immune_region_.ContainsObject(obj)) {
+      return true;
+    }
+    for (space::ContinuousSpace* space : spaces_) {
+      if (space->HasAddress(obj)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  // Setup the immune region to the largest continuous set of immune spaces. The immune region is
+  // just the for the fast path lookup.
+  void CreateLargestImmuneRegion();
+
+  std::set<space::ContinuousSpace*, CompareByBegin> spaces_;
+  ImmuneRegion largest_immune_region_;
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
new file mode 100644
index 0000000..f741117
--- /dev/null
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_runtime_test.h"
+#include "gc/collector/immune_spaces.h"
+#include "gc/space/image_space.h"
+#include "gc/space/space-inl.h"
+#include "oat_file.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace mirror {
+class Object;
+}  // namespace mirror
+namespace gc {
+namespace collector {
+
+class ImmuneSpacesTest : public CommonRuntimeTest {};
+
+class DummySpace : public space::ContinuousSpace {
+ public:
+  DummySpace(uint8_t* begin, uint8_t* end)
+      : ContinuousSpace("DummySpace",
+                        space::kGcRetentionPolicyNeverCollect,
+                        begin,
+                        end,
+                        /*limit*/end) {}
+
+  space::SpaceType GetType() const OVERRIDE {
+    return space::kSpaceTypeMallocSpace;
+  }
+
+  bool CanMoveObjects() const OVERRIDE {
+    return false;
+  }
+
+  accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
+    return nullptr;
+  }
+
+  accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
+    return nullptr;
+  }
+};
+
+TEST_F(ImmuneSpacesTest, AppendBasic) {
+  ImmuneSpaces spaces;
+  uint8_t* const base = reinterpret_cast<uint8_t*>(0x1000);
+  DummySpace a(base, base + 45 * KB);
+  DummySpace b(a.Limit(), a.Limit() + 813 * KB);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(&a);
+    spaces.AddSpace(&b);
+  }
+  EXPECT_TRUE(spaces.ContainsSpace(&a));
+  EXPECT_TRUE(spaces.ContainsSpace(&b));
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), a.Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit());
+}
+
+class DummyImageSpace : public space::ImageSpace {
+ public:
+  DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap)
+      : ImageSpace("DummyImageSpace",
+                   /*image_location*/"",
+                   map,
+                   live_bitmap,
+                   map->End()) {}
+
+  // OatSize is how large the oat file is after the image.
+  static DummyImageSpace* Create(size_t size, size_t oat_size) {
+    std::string error_str;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace",
+                                                     nullptr,
+                                                     size,
+                                                     PROT_READ | PROT_WRITE,
+                                                     /*low_4gb*/true,
+                                                     /*reuse*/false,
+                                                     &error_str));
+    if (map == nullptr) {
+      LOG(ERROR) << error_str;
+      return nullptr;
+    }
+    std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap(
+        accounting::ContinuousSpaceBitmap::Create("bitmap", map->Begin(), map->Size()));
+    if (live_bitmap == nullptr) {
+      return nullptr;
+    }
+    // Create image header.
+    ImageSection sections[ImageHeader::kSectionCount];
+    new (map->Begin()) ImageHeader(
+        /*image_begin*/PointerToLowMemUInt32(map->Begin()),
+        /*image_size*/map->Size(),
+        sections,
+        /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1,
+        /*oat_checksum*/0u,
+        /*oat_file_begin*/PointerToLowMemUInt32(map->End()),
+        /*oat_data_begin*/PointerToLowMemUInt32(map->End()),
+        /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size),
+        /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size),
+        /*pointer_size*/sizeof(void*),
+        /*compile_pic*/false);
+    return new DummyImageSpace(map.release(), live_bitmap.release());
+  }
+};
+
+TEST_F(ImmuneSpacesTest, AppendAfterImage) {
+  ImmuneSpaces spaces;
+  constexpr size_t image_size = 123 * kPageSize;
+  constexpr size_t image_oat_size = 321 * kPageSize;
+  std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size));
+  ASSERT_TRUE(image_space != nullptr);
+  const ImageHeader& image_header = image_space->GetImageHeader();
+  EXPECT_EQ(image_header.GetImageSize(), image_size);
+  EXPECT_EQ(static_cast<size_t>(image_header.GetOatFileEnd() - image_header.GetOatFileBegin()),
+            image_oat_size);
+  DummySpace space(image_header.GetOatFileEnd(), image_header.GetOatFileEnd() + 813 * kPageSize);
+  EXPECT_NE(image_space->Limit(), space.Begin());
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    spaces.AddSpace(image_space.get());
+    spaces.AddSpace(&space);
+  }
+  EXPECT_TRUE(spaces.ContainsSpace(image_space.get()));
+  EXPECT_TRUE(spaces.ContainsSpace(&space));
+  // CreateLargestImmuneRegion should have coalesced the two spaces since the oat code after the
+  // image prevents gaps.
+  // Check that we have a continuous region.
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()),
+            image_space->Begin());
+  EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit());
+}
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index f561764..ce6467a 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -45,7 +45,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
@@ -115,7 +115,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_;
   // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
   ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -148,7 +148,7 @@
     // Verify all the objects have the correct forward pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (!immune_region_.ContainsObject(obj)) {
+  if (!immune_spaces_.IsInImmuneRegion(obj)) {
     if (objects_before_forwarding_->HasAddress(obj)) {
       if (!objects_before_forwarding_->Set(obj)) {
         MarkStackPush(obj);  // This object was not previously marked.
@@ -218,7 +218,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   for (auto& space : heap_->GetContinuousSpaces()) {
     // If the space is immune then we need to mark the references to other spaces.
-    if (immune_region_.ContainsSpace(space)) {
+    if (immune_spaces_.ContainsSpace(space)) {
       accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
       if (table != nullptr) {
         // TODO: Improve naming.
@@ -475,7 +475,7 @@
 }
 
 mirror::Object* MarkCompact::IsMarked(mirror::Object* object) {
-  if (immune_region_.ContainsObject(object)) {
+  if (immune_spaces_.IsInImmuneRegion(object)) {
     return object;
   }
   if (updating_references_) {
@@ -498,7 +498,7 @@
 }
 
 bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const {
-  return space != space_ && !immune_region_.ContainsSpace(space);
+  return space != space_ && !immune_spaces_.ContainsSpace(space);
 }
 
 class MoveObjectVisitor {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 8d91939..8a12094 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -26,7 +26,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "lock_word.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -194,8 +194,8 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune region is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked.
+  ImmuneSpaces immune_spaces_;
 
   // Bump pointer space which we are collecting.
   space::BumpPointerSpace* space_;
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index db516a0..5427f88 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -86,7 +86,7 @@
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
@@ -115,7 +115,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   no_reference_class_count_.StoreRelaxed(0);
   normal_count_.StoreRelaxed(0);
   class_count_.StoreRelaxed(0);
@@ -268,16 +268,41 @@
   PreCleanCards();
 }
 
+class ScanObjectVisitor {
+ public:
+  explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
+      : mark_sweep_(mark_sweep) {}
+
+  void operator()(mirror::Object* obj) const
+      ALWAYS_INLINE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    if (kCheckLocks) {
+      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+      Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
+    }
+    mark_sweep_->ScanObject(obj);
+  }
+
+ private:
+  MarkSweep* const mark_sweep_;
+};
+
 void MarkSweep::UpdateAndMarkModUnion() {
-  for (const auto& space : heap_->GetContinuousSpaces()) {
-    if (immune_region_.ContainsSpace(space)) {
-      const char* name = space->IsZygoteSpace()
-          ? "UpdateAndMarkZygoteModUnionTable"
-          : "UpdateAndMarkImageModUnionTable";
-      TimingLogger::ScopedTiming t(name, GetTimings());
-      accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
-      CHECK(mod_union_table != nullptr);
+  for (const auto& space : immune_spaces_.GetSpaces()) {
+    const char* name = space->IsZygoteSpace()
+        ? "UpdateAndMarkZygoteModUnionTable"
+        : "UpdateAndMarkImageModUnionTable";
+    DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
+    TimingLogger::ScopedTiming t(name, GetTimings());
+    accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space);
+    if (mod_union_table != nullptr) {
       mod_union_table->UpdateAndMarkReferences(this);
+    } else {
+      // No mod-union table, scan all the live bits. This can only occur for app images.
+      space->GetLiveBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
+                                               reinterpret_cast<uintptr_t>(space->End()),
+                                               ScanObjectVisitor(this));
     }
   }
 }
@@ -460,7 +485,7 @@
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
+  if (immune_spaces_.IsInImmuneRegion(obj)) {
     if (kCountMarkedObjects) {
       ++mark_immune_count_;
     }
@@ -501,7 +526,7 @@
     // Verify all the objects have the correct pointer installed.
     obj->AssertReadBarrierPointer();
   }
-  if (immune_region_.ContainsObject(obj)) {
+  if (immune_spaces_.IsInImmuneRegion(obj)) {
     DCHECK(IsMarked(obj) != nullptr);
     return false;
   }
@@ -606,26 +631,6 @@
       this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving));
 }
 
-class ScanObjectVisitor {
- public:
-  explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
-      : mark_sweep_(mark_sweep) {}
-
-  void operator()(mirror::Object* obj) const
-      ALWAYS_INLINE
-      REQUIRES(Locks::heap_bitmap_lock_)
-      SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (kCheckLocks) {
-      Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
-      Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
-    }
-    mark_sweep_->ScanObject(obj);
-  }
-
- private:
-  MarkSweep* const mark_sweep_;
-};
-
 class DelayReferenceReferentVisitor {
  public:
   explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {}
@@ -1193,7 +1198,8 @@
   std::vector<space::ContinuousSpace*> sweep_spaces;
   space::ContinuousSpace* non_moving_space = nullptr;
   for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
-    if (space->IsAllocSpace() && !immune_region_.ContainsSpace(space) &&
+    if (space->IsAllocSpace() &&
+        !immune_spaces_.ContainsSpace(space) &&
         space->GetLiveBitmap() != nullptr) {
       if (space == heap_->GetNonMovingSpace()) {
         non_moving_space = space;
@@ -1422,7 +1428,7 @@
 }
 
 inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) {
-  if (immune_region_.ContainsObject(object)) {
+  if (immune_spaces_.IsInImmuneRegion(object)) {
     return object;
   }
   if (current_space_bitmap_->HasAddress(object)) {
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 8f7df78..245f96b 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -26,7 +26,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 
@@ -314,8 +314,9 @@
 
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune range is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked. Immune spaces that aren't in the
+  // immune region are handled by the normal marking logic.
+  ImmuneSpaces immune_spaces_;
 
   // Parallel finger.
   AtomicInteger atomic_finger_;
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 15f782a..9847794 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -39,7 +39,7 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
       CHECK(space->IsZygoteSpace());
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     }
   }
 }
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 06d20f5..12cf3db 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -74,7 +74,7 @@
       MarkStackPush(forward_address);
     }
     obj_ptr->Assign(forward_address);
-  } else if (!collect_from_space_only_ && !immune_region_.ContainsObject(obj)) {
+  } else if (!collect_from_space_only_ && !immune_spaces_.IsInImmuneRegion(obj)) {
     BitmapSetSlowPathVisitor visitor(this);
     if (!mark_bitmap_->Set(obj, visitor)) {
       // This object was not previously marked.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 7f57f30..e9497a2 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -66,8 +66,9 @@
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
-      CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space;
+      immune_spaces_.AddSpace(space);
     } else if (space->GetLiveBitmap() != nullptr) {
+      // TODO: We can probably also add this space to the immune region.
       if (space == to_space_ || collect_from_space_only_) {
         if (collect_from_space_only_) {
           // Bind the bitmaps of the main free list space and the non-moving space we are doing a
@@ -144,7 +145,7 @@
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
-  immune_region_.Reset();
+  immune_spaces_.Reset();
   is_large_object_space_immune_ = false;
   saved_bytes_ = 0;
   bytes_moved_ = 0;
@@ -376,7 +377,13 @@
           << "generational_=" << generational_ << " "
           << "collect_from_space_only_=" << collect_from_space_only_;
       accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space);
-      CHECK_EQ(rem_set != nullptr, kUseRememberedSet);
+      if (kUseRememberedSet) {
+        // App images currently do not have remembered sets.
+        DCHECK((space->IsImageSpace() && space != heap_->GetBootImageSpace()) ||
+               rem_set != nullptr);
+      } else {
+        DCHECK(rem_set == nullptr);
+      }
       if (rem_set != nullptr) {
         TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings());
         rem_set->UpdateAndMarkReferences(from_space_, this);
@@ -767,7 +774,8 @@
   if (from_space_->HasAddress(obj)) {
     // Returns either the forwarding address or null.
     return GetForwardingAddressInFromSpace(obj);
-  } else if (collect_from_space_only_ || immune_region_.ContainsObject(obj) ||
+  } else if (collect_from_space_only_ ||
+             immune_spaces_.IsInImmuneRegion(obj) ||
              to_space_->HasAddress(obj)) {
     return obj;  // Already forwarded, must be marked.
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index b9246ca..a905904 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -25,7 +25,7 @@
 #include "garbage_collector.h"
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
-#include "immune_region.h"
+#include "immune_spaces.h"
 #include "mirror/object_reference.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -201,8 +201,8 @@
   // object.
   accounting::ObjectStack* mark_stack_;
 
-  // Immune region, every object inside the immune region is assumed to be marked.
-  ImmuneRegion immune_region_;
+  // Every object inside the immune spaces is assumed to be marked.
+  ImmuneSpaces immune_spaces_;
 
   // If true, the large object space is immune.
   bool is_large_object_space_immune_;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ab93142..da9a79e 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -233,7 +233,8 @@
       backtrace_lock_(nullptr),
       seen_backtrace_count_(0u),
       unique_backtrace_count_(0u),
-      gc_disabled_for_shutdown_(false) {
+      gc_disabled_for_shutdown_(false),
+      boot_image_space_(nullptr) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
@@ -262,15 +263,16 @@
   if (!image_file_name.empty()) {
     ATRACE_BEGIN("ImageSpace::Create");
     std::string error_msg;
-    auto* image_space = space::ImageSpace::Create(image_file_name.c_str(), image_instruction_set,
+    boot_image_space_ = space::ImageSpace::Create(image_file_name.c_str(),
+                                                  image_instruction_set,
                                                   &error_msg);
     ATRACE_END();
-    if (image_space != nullptr) {
-      AddSpace(image_space);
+    if (boot_image_space_ != nullptr) {
+      AddSpace(boot_image_space_);
       // Oat files referenced by image files immediately follow them in memory, ensure alloc space
       // isn't going to get in the middle
-      uint8_t* oat_file_end_addr = image_space->GetImageHeader().GetOatFileEnd();
-      CHECK_GT(oat_file_end_addr, image_space->End());
+      uint8_t* oat_file_end_addr = boot_image_space_->GetImageHeader().GetOatFileEnd();
+      CHECK_GT(oat_file_end_addr, boot_image_space_->End());
       requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
     } else {
       LOG(ERROR) << "Could not create image space with image file '" << image_file_name << "'. "
@@ -454,11 +456,11 @@
     rb_table_.reset(new accounting::ReadBarrierTable());
     DCHECK(rb_table_->IsAllCleared());
   }
-  if (GetImageSpace() != nullptr) {
+  if (GetBootImageSpace() != nullptr) {
     // Don't add the image mod union table if we are running without an image, this can crash if
     // we use the CardCache implementation.
     accounting::ModUnionTable* mod_union_table = new accounting::ModUnionTableToZygoteAllocspace(
-        "Image mod-union table", this, GetImageSpace());
+        "Image mod-union table", this, GetBootImageSpace());
     CHECK(mod_union_table != nullptr) << "Failed to create image mod-union table";
     AddModUnionTable(mod_union_table);
   }
@@ -523,12 +525,12 @@
       garbage_collectors_.push_back(mark_compact_collector_);
     }
   }
-  if (GetImageSpace() != nullptr && non_moving_space_ != nullptr &&
+  if (GetBootImageSpace() != nullptr && non_moving_space_ != nullptr &&
       (is_zygote || separate_non_moving_space || foreground_collector_type_ == kCollectorTypeGSS)) {
     // Check that there's no gap between the image space and the non moving space so that the
     // immune region won't break (eg. due to a large object allocated in the gap). This is only
     // required when we're the zygote or using GSS.
-    bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(),
+    bool no_gap = MemMap::CheckNoGaps(GetBootImageSpace()->GetMemMap(),
                                       non_moving_space_->GetMemMap());
     if (!no_gap) {
       PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
@@ -748,15 +750,6 @@
   return true;
 }
 
-bool Heap::HasImageSpace() const {
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      return true;
-    }
-  }
-  return false;
-}
-
 void Heap::IncrementDisableMovingGC(Thread* self) {
   // Need to do this holding the lock to prevent races where the GC is about to run / running when
   // we attempt to disable it.
@@ -1164,6 +1157,7 @@
   STLDeleteElements(&continuous_spaces_);
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
+  delete thread_flip_lock_;
   delete pending_task_lock_;
   delete backtrace_lock_;
   if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
@@ -1208,13 +1202,8 @@
   return FindDiscontinuousSpaceFromObject(obj, fail_ok);
 }
 
-space::ImageSpace* Heap::GetImageSpace() const {
-  for (const auto& space : continuous_spaces_) {
-    if (space->IsImageSpace()) {
-      return space->AsImageSpace();
-    }
-  }
-  return nullptr;
+space::ImageSpace* Heap::GetBootImageSpace() const {
+  return boot_image_space_;
 }
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index cc48172..e23b1a3 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -580,9 +580,9 @@
   // Unbind any bound bitmaps.
   void UnBindBitmaps() REQUIRES(Locks::heap_bitmap_lock_);
 
-  // DEPRECATED: Should remove in "near" future when support for multiple image spaces is added.
-  // Assumes there is only one image space.
-  space::ImageSpace* GetImageSpace() const;
+  // Returns the boot image space. There may be multiple image spaces, but there is only one boot
+  // image space.
+  space::ImageSpace* GetBootImageSpace() const;
 
   // Permenantly disable moving garbage collection.
   void DisableMovingGc() REQUIRES(!*gc_complete_lock_);
@@ -660,7 +660,9 @@
   void RemoveRememberedSet(space::Space* space);
 
   bool IsCompilingBoot() const;
-  bool HasImageSpace() const;
+  bool HasImageSpace() const {
+    return boot_image_space_ != nullptr;
+  }
 
   ReferenceProcessor* GetReferenceProcessor() {
     return reference_processor_.get();
@@ -1320,6 +1322,9 @@
   // allocating.
   bool gc_disabled_for_shutdown_ GUARDED_BY(gc_complete_lock_);
 
+  // Boot image space.
+  space::ImageSpace* boot_image_space_;
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 77f606d..e754a52 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -20,6 +20,8 @@
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "memory_tool_malloc_space-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -318,10 +320,17 @@
 
 // Implement the dlmalloc morecore callback.
 void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) {
-  Heap* heap = Runtime::Current()->GetHeap();
+  Runtime* runtime = Runtime::Current();
+  Heap* heap = runtime->GetHeap();
   ::art::gc::space::DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace();
   // Support for multiple DlMalloc provided by a slow path.
   if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) {
+    if (LIKELY(runtime->GetJit() != nullptr)) {
+      jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache();
+      if (code_cache->OwnsSpace(mspace)) {
+        return code_cache->MoreCore(mspace, increment);
+      }
+    }
     dlmalloc_space = nullptr;
     for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
       if (space->IsDlMallocSpace()) {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index ce64b10..e2b2431 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -58,10 +58,7 @@
   CHECK_ALIGNED(max_delta, kPageSize);
   CHECK_LT(min_delta, max_delta);
 
-  std::default_random_engine generator;
-  generator.seed(NanoTime() * getpid());
-  std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta);
-  int32_t r = distribution(generator);
+  int32_t r = GetRandomNumber<int32_t>(min_delta, max_delta);
   if (r % 2 == 0) {
     r = RoundUp(r, kPageSize);
   } else {
@@ -709,19 +706,32 @@
   }
 
   // Note: The image header is part of the image due to mmap page alignment required of offset.
-  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(
-      image_header.GetImageBegin(), image_header.GetImageSize(),
-      PROT_READ | PROT_WRITE, MAP_PRIVATE, file->Fd(), 0, false, image_filename, error_msg));
-  if (map.get() == nullptr) {
+  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
+                                                       image_header.GetImageSize(),
+                                                       PROT_READ | PROT_WRITE,
+                                                       MAP_PRIVATE,
+                                                       file->Fd(),
+                                                       0,
+                                                       /*low_4gb*/false,
+                                                       /*reuse*/false,
+                                                       image_filename,
+                                                       error_msg));
+  if (map == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
   }
   CHECK_EQ(image_header.GetImageBegin(), map->Begin());
   DCHECK_EQ(0, memcmp(&image_header, map->Begin(), sizeof(ImageHeader)));
 
-  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(
-      nullptr, bitmap_section.Size(), PROT_READ, MAP_PRIVATE, file->Fd(),
-      bitmap_section.Offset(), false, image_filename, error_msg));
+  std::unique_ptr<MemMap> image_map(MemMap::MapFileAtAddress(nullptr,
+                                                             bitmap_section.Size(),
+                                                             PROT_READ, MAP_PRIVATE,
+                                                             file->Fd(),
+                                                             bitmap_section.Offset(),
+                                                             /*low_4gb*/false,
+                                                             /*reuse*/false,
+                                                             image_filename,
+                                                             error_msg));
   if (image_map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 9920742..babd672 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -119,7 +119,22 @@
                                 bool* has_data,
                                 bool *is_global_cache);
 
- private:
+  // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields.
+  uint8_t* GetImageEnd() const {
+    return Begin() + GetImageHeader().GetImageSize();
+  }
+
+  // Return the start of the associated oat file.
+  uint8_t* GetOatFileBegin() const {
+    return GetImageHeader().GetOatFileBegin();
+  }
+
+  // Return the end of the associated oat file.
+  uint8_t* GetOatFileEnd() const {
+    return GetImageHeader().GetOatFileEnd();
+  }
+
+ protected:
   // Tries to initialize an ImageSpace from the given image path,
   // returning null on error.
   //
@@ -157,6 +172,7 @@
 
   const std::string image_location_;
 
+ private:
   DISALLOW_COPY_AND_ASSIGN(ImageSpace);
 };
 
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 477e67b..3734bcc 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -198,7 +198,7 @@
   ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  // Root visitors take pointers to root_ and place the min CompressedReference** arrays. We use a
+  // Root visitors take pointers to root_ and place them in CompressedReference** arrays. We use a
   // CompressedReference<mirror::Object> here since it violates strict aliasing requirements to
   // cast CompressedReference<MirrorType>* to CompressedReference<mirror::Object>*.
   mutable mirror::CompressedReference<mirror::Object> root_;
diff --git a/runtime/globals.h b/runtime/globals.h
index 987a94e..e7ea6f3 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -87,8 +87,18 @@
 #endif
 
 static constexpr bool kUseBakerOrBrooksReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier;
-static constexpr bool kUseReadBarrier = kUseBakerReadBarrier || kUseBrooksReadBarrier ||
-    kUseTableLookupReadBarrier;
+static constexpr bool kUseReadBarrier =
+    kUseBakerReadBarrier || kUseBrooksReadBarrier || kUseTableLookupReadBarrier;
+
+// Debugging flag that forces the generation of read barriers, but
+// does not trigger the use of the concurrent copying GC.
+//
+// TODO: Remove this flag when the read barriers compiler
+// instrumentation is completed.
+static constexpr bool kForceReadBarrier = false;
+// TODO: Likewise, remove this flag when kForceReadBarrier is removed
+// and replace it with kUseReadBarrier.
+static constexpr bool kEmitCompilerReadBarrier = kForceReadBarrier || kUseReadBarrier;
 
 // If true, references within the heap are poisoned (negated).
 #ifdef USE_HEAP_POISONING
diff --git a/runtime/image.h b/runtime/image.h
index 20e4159..555cf5d 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -84,7 +84,7 @@
         image_roots_(0U), pointer_size_(0U), compile_pic_(0) {}
 
   ImageHeader(uint32_t image_begin,
-              uint32_t image_size_,
+              uint32_t image_size,
               ImageSection* sections,
               uint32_t image_roots,
               uint32_t oat_checksum,
@@ -93,7 +93,7 @@
               uint32_t oat_data_end,
               uint32_t oat_file_end,
               uint32_t pointer_size,
-              bool compile_pic_);
+              bool compile_pic);
 
   bool IsValid() const;
   const char* GetMagic() const;
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index e937397..bc2c197 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -66,13 +66,20 @@
 
 
 Instrumentation::Instrumentation()
-    : instrumentation_stubs_installed_(false), entry_exit_stubs_installed_(false),
+    : instrumentation_stubs_installed_(false),
+      entry_exit_stubs_installed_(false),
       interpreter_stubs_installed_(false),
-      interpret_only_(false), forced_interpret_only_(false),
-      have_method_entry_listeners_(false), have_method_exit_listeners_(false),
-      have_method_unwind_listeners_(false), have_dex_pc_listeners_(false),
-      have_field_read_listeners_(false), have_field_write_listeners_(false),
-      have_exception_caught_listeners_(false), have_backward_branch_listeners_(false),
+      interpret_only_(false),
+      forced_interpret_only_(false),
+      have_method_entry_listeners_(false),
+      have_method_exit_listeners_(false),
+      have_method_unwind_listeners_(false),
+      have_dex_pc_listeners_(false),
+      have_field_read_listeners_(false),
+      have_field_write_listeners_(false),
+      have_exception_caught_listeners_(false),
+      have_backward_branch_listeners_(false),
+      have_invoke_virtual_or_interface_listeners_(false),
       deoptimized_methods_lock_("deoptimized methods lock"),
       deoptimization_enabled_(false),
       interpreter_handler_table_(kMainHandlerTable),
@@ -101,7 +108,7 @@
 }
 
 void Instrumentation::InstallStubsForMethod(ArtMethod* method) {
-  if (method->IsAbstract() || method->IsProxyMethod()) {
+  if (!method->IsInvokable() || method->IsProxyMethod()) {
     // Do not change stubs for these methods.
     return;
   }
@@ -297,7 +304,9 @@
 
 // Removes the instrumentation exit pc as the return PC for every quick frame.
 static void InstrumentationRestoreStack(Thread* thread, void* arg)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+    REQUIRES(Locks::mutator_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+
   struct RestoreStackVisitor FINAL : public StackVisitor {
     RestoreStackVisitor(Thread* thread_in, uintptr_t instrumentation_exit_pc,
                         Instrumentation* instrumentation)
@@ -599,9 +608,11 @@
       empty = IsDeoptimizedMethodsEmpty();  // Avoid lock violation.
     }
     if (empty) {
-      instrumentation_stubs_installed_ = false;
       MutexLock mu(self, *Locks::thread_list_lock_);
       Runtime::Current()->GetThreadList()->ForEach(InstrumentationRestoreStack, this);
+      // Only do this after restoring, as walking the stack when restoring will see
+      // the instrumentation exit pc.
+      instrumentation_stubs_installed_ = false;
     }
   }
 }
@@ -723,7 +734,7 @@
 void Instrumentation::Deoptimize(ArtMethod* method) {
   CHECK(!method->IsNative());
   CHECK(!method->IsProxyMethod());
-  CHECK(!method->IsAbstract());
+  CHECK(method->IsInvokable());
 
   Thread* self = Thread::Current();
   {
@@ -746,7 +757,7 @@
 void Instrumentation::Undeoptimize(ArtMethod* method) {
   CHECK(!method->IsNative());
   CHECK(!method->IsProxyMethod());
-  CHECK(!method->IsAbstract());
+  CHECK(method->IsInvokable());
 
   Thread* self = Thread::Current();
   bool empty;
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index f4658d5..e2e4782 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -187,7 +187,7 @@
   if (image_added_to_intern_table_) {
     return nullptr;
   }
-  gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetImageSpace();
+  gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetBootImageSpace();
   if (image == nullptr) {
     return nullptr;  // No image present.
   }
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 7c0594a..d686f74 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -262,7 +262,7 @@
 
 static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
                              ShadowFrame& shadow_frame, JValue result_register) {
-  DCHECK(!shadow_frame.GetMethod()->IsAbstract());
+  DCHECK(shadow_frame.GetMethod()->IsInvokable());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
 
@@ -318,9 +318,9 @@
   if (code_item != nullptr) {
     num_regs =  code_item->registers_size_;
     num_ins = code_item->ins_size_;
-  } else if (method->IsAbstract()) {
+  } else if (!method->IsInvokable()) {
     self->EndAssertNoThreadSuspension(old_cause);
-    ThrowAbstractMethodError(method);
+    method->ThrowInvocationTimeError();
     return;
   } else {
     DCHECK(method->IsNative());
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index c8650c4..9f6699f 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -141,8 +141,9 @@
 
   if (UNLIKELY(called_method == nullptr)) {
     // The shadow frame should already be pushed, so we don't need to update it.
-  } else if (UNLIKELY(called_method->IsAbstract())) {
-    ThrowAbstractMethodError(called_method);
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
+    // We got an error.
     // TODO(iam): Also handle the case when the method is non-static, what error do we throw?
     // TODO(iam): Also make sure that ACC_LAMBDA is set.
   } else if (UNLIKELY(called_method->GetCodeItem() == nullptr)) {
@@ -617,8 +618,8 @@
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else if (UNLIKELY(called_method->IsAbstract())) {
-    ThrowAbstractMethodError(called_method);
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
     result->SetJ(0);
     return false;
   } else {
@@ -656,8 +657,8 @@
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else if (UNLIKELY(called_method->IsAbstract())) {
-    ThrowAbstractMethodError(called_method);
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
     result->SetJ(0);
     return false;
   } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index bf95a0e..c9831e6 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -66,6 +66,11 @@
     }                                                                                           \
   } while (false)
 
+#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \
+  do { \
+    instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \
+  } while (false)
+
 static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
   DCHECK(inst->IsExperimental());
   return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas);
@@ -542,6 +547,7 @@
         PREAMBLE();
         int8_t offset = inst->VRegA_10t(inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -551,6 +557,7 @@
         PREAMBLE();
         int16_t offset = inst->VRegA_20t();
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -560,6 +567,7 @@
         PREAMBLE();
         int32_t offset = inst->VRegA_30t();
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -569,6 +577,7 @@
         PREAMBLE();
         int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -578,6 +587,7 @@
         PREAMBLE();
         int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data);
         if (IsBackwardBranch(offset)) {
+          BACKWARD_BRANCH_INSTRUMENTATION(offset);
           self->AllowThreadSuspension();
         }
         inst = inst->RelativeAt(offset);
@@ -681,6 +691,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -695,6 +706,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -709,6 +721,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -723,6 +736,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -737,6 +751,7 @@
         shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -751,6 +766,7 @@
             shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) {
           int16_t offset = inst->VRegC_22t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -764,6 +780,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -777,6 +794,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -790,6 +808,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -803,6 +822,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -816,6 +836,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
@@ -829,6 +850,7 @@
         if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) {
           int16_t offset = inst->VRegB_21t();
           if (IsBackwardBranch(offset)) {
+            BACKWARD_BRANCH_INSTRUMENTATION(offset);
             self->AllowThreadSuspension();
           }
           inst = inst->RelativeAt(offset);
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index b5e28e9..7cc05f7 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -56,15 +56,17 @@
 class SharedLibrary {
  public:
   SharedLibrary(JNIEnv* env, Thread* self, const std::string& path, void* handle,
-                jobject class_loader)
+                jobject class_loader, void* class_loader_allocator)
       : path_(path),
         handle_(handle),
         needs_native_bridge_(false),
         class_loader_(env->NewWeakGlobalRef(class_loader)),
+        class_loader_allocator_(class_loader_allocator),
         jni_on_load_lock_("JNI_OnLoad lock"),
         jni_on_load_cond_("JNI_OnLoad condition variable", jni_on_load_lock_),
         jni_on_load_thread_id_(self->GetThreadId()),
         jni_on_load_result_(kPending) {
+    CHECK(class_loader_allocator_ != nullptr);
   }
 
   ~SharedLibrary() {
@@ -78,6 +80,10 @@
     return class_loader_;
   }
 
+  const void* GetClassLoaderAllocator() const {
+    return class_loader_allocator_;
+  }
+
   const std::string& GetPath() const {
     return path_;
   }
@@ -169,6 +175,9 @@
   // The ClassLoader this library is associated with, a weak global JNI reference that is
   // created/deleted with the scope of the library.
   const jweak class_loader_;
+  // Used to do equality check on class loaders so we can avoid decoding the weak root and read
+  // barriers that mess with class unloading.
+  const void* class_loader_allocator_;
 
   // Guards remaining items.
   Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -224,11 +233,15 @@
       SHARED_REQUIRES(Locks::mutator_lock_) {
     std::string jni_short_name(JniShortName(m));
     std::string jni_long_name(JniLongName(m));
-    const mirror::ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
+    mirror::ClassLoader* const declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
     ScopedObjectAccessUnchecked soa(Thread::Current());
+    void* const declaring_class_loader_allocator =
+        Runtime::Current()->GetClassLinker()->GetAllocatorForClassLoader(declaring_class_loader);
+    CHECK(declaring_class_loader_allocator != nullptr);
     for (const auto& lib : libraries_) {
       SharedLibrary* const library = lib.second;
-      if (soa.Decode<mirror::ClassLoader*>(library->GetClassLoader()) != declaring_class_loader) {
+      // Use the allocator address for class loader equality to avoid unnecessary weak root decode.
+      if (library->GetClassLoaderAllocator() != declaring_class_loader_allocator) {
         // We only search libraries loaded by the appropriate ClassLoader.
         continue;
       }
@@ -269,7 +282,7 @@
         // If class_loader is a null jobject then it is the boot class loader. We should not unload
         // the native libraries of the boot class loader.
         if (class_loader != nullptr &&
-            soa.Decode<mirror::ClassLoader*>(class_loader) == nullptr) {
+            soa.Self()->IsJWeakCleared(class_loader)) {
           void* const sym = library->FindSymbol("JNI_OnUnload", nullptr);
           if (sym == nullptr) {
             VLOG(jni) << "[No JNI_OnUnload found in \"" << library->GetPath() << "\"]";
@@ -667,6 +680,19 @@
   return weak_globals_.SynchronizedGet(ref);
 }
 
+bool JavaVMExt::IsWeakGlobalCleared(Thread* self, IndirectRef ref) {
+  DCHECK_EQ(GetIndirectRefKind(ref), kWeakGlobal);
+  MutexLock mu(self, weak_globals_lock_);
+  while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    weak_globals_add_condition_.WaitHoldingLocks(self);
+  }
+  // When just checking a weak ref has been cleared, avoid triggering the read barrier in decode
+  // (DecodeWeakGlobal) so that we won't accidentally mark the object alive. Since the cleared
+  // sentinel is a non-moving object, we can compare the ref to it without the read barrier and
+  // decide if it's cleared.
+  return Runtime::Current()->IsClearedJniWeakGlobal(weak_globals_.Get<kWithoutReadBarrier>(ref));
+}
+
 void JavaVMExt::UpdateWeakGlobal(Thread* self, IndirectRef ref, mirror::Object* result) {
   MutexLock mu(self, weak_globals_lock_);
   weak_globals_.Update(ref, result);
@@ -703,8 +729,19 @@
     MutexLock mu(self, *Locks::jni_libraries_lock_);
     library = libraries_->Get(path);
   }
+  void* class_loader_allocator = nullptr;
+  {
+    ScopedObjectAccess soa(env);
+    // As the incoming class loader is reachable/alive during the call of this function,
+    // it's okay to decode it without worrying about unexpectedly marking it alive.
+    mirror::ClassLoader* loader = soa.Decode<mirror::ClassLoader*>(class_loader);
+    class_loader_allocator =
+        Runtime::Current()->GetClassLinker()->GetAllocatorForClassLoader(loader);
+    CHECK(class_loader_allocator != nullptr);
+  }
   if (library != nullptr) {
-    if (env->IsSameObject(library->GetClassLoader(), class_loader) == JNI_FALSE) {
+    // Use the allocator pointers for class loader equality to avoid unnecessary weak root decode.
+    if (library->GetClassLoaderAllocator() != class_loader_allocator) {
       // The library will be associated with class_loader. The JNI
       // spec says we can't load the same library into more than one
       // class loader.
@@ -765,7 +802,7 @@
   {
     // Create SharedLibrary ahead of taking the libraries lock to maintain lock ordering.
     std::unique_ptr<SharedLibrary> new_library(
-        new SharedLibrary(env, self, path, handle, class_loader));
+        new SharedLibrary(env, self, path, handle, class_loader, class_loader_allocator));
     MutexLock mu(self, *Locks::jni_libraries_lock_);
     library = libraries_->Get(path);
     if (library == nullptr) {  // We won race to get libraries_lock.
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index c1fbdc0..618f6fa 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -149,6 +149,11 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!weak_globals_lock_);
 
+  // Checks if the weak global ref has been cleared by the GC without decode (read barrier.)
+  bool IsWeakGlobalCleared(Thread* self, IndirectRef ref)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!weak_globals_lock_);
+
   Mutex& WeakGlobalsLock() RETURN_CAPABILITY(weak_globals_lock_) {
     return weak_globals_lock_;
   }
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index df6936b..f1f4a03 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -745,6 +745,15 @@
   return ERR_NONE;
 }
 
+// Default implementation for IDEs relying on this command.
+static JdwpError M_IsObsolete(JdwpState*, Request* request, ExpandBuf* reply)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  request->ReadRefTypeId();  // unused reference type ID
+  request->ReadMethodId();   // unused method ID
+  expandBufAdd1(reply, false);  // a method is never obsolete.
+  return ERR_NONE;
+}
+
 /*
  * Given an object reference, return the runtime type of the object
  * (class or array).
@@ -1477,7 +1486,7 @@
   { 6,    1,  M_LineTable,                "Method.LineTable" },
   { 6,    2,  M_VariableTable,            "Method.VariableTable" },
   { 6,    3,  M_Bytecodes,                "Method.Bytecodes" },
-  { 6,    4,  nullptr,                    "Method.IsObsolete" },
+  { 6,    4,  M_IsObsolete,               "Method.IsObsolete" },
   { 6,    5,  M_VariableTableWithGeneric, "Method.VariableTableWithGeneric" },
 
   /* Field command set (8) */
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 5afd28e..27a0e2d 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -24,9 +24,10 @@
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
 #include "jit_instrumentation.h"
+#include "oat_file_manager.h"
+#include "offline_profiling_info.h"
 #include "runtime.h"
 #include "runtime_options.h"
-#include "thread_list.h"
 #include "utils.h"
 
 namespace art {
@@ -35,14 +36,18 @@
 JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
   auto* jit_options = new JitOptions;
   jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT);
-  jit_options->code_cache_capacity_ =
-      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
+  jit_options->code_cache_initial_capacity_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
+  jit_options->code_cache_max_capacity_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
   jit_options->compile_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
   jit_options->warmup_threshold_ =
       options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
+  jit_options->save_profiling_info_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);;
   return jit_options;
 }
 
@@ -70,13 +75,19 @@
   if (!jit->LoadCompiler(error_msg)) {
     return nullptr;
   }
-  jit->code_cache_.reset(JitCodeCache::Create(options->GetCodeCacheCapacity(), error_msg));
+  jit->code_cache_.reset(JitCodeCache::Create(
+      options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg));
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
-  LOG(INFO) << "JIT created with code_cache_capacity="
-      << PrettySize(options->GetCodeCacheCapacity())
-      << " compile_threshold=" << options->GetCompileThreshold();
+  jit->offline_profile_info_.reset(nullptr);
+  if (options->GetSaveProfilingInfo()) {
+    jit->offline_profile_info_.reset(new OfflineProfilingInfo());
+  }
+  LOG(INFO) << "JIT created with initial_capacity="
+      << PrettySize(options->GetCodeCacheInitialCapacity())
+      << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
+      << ", compile_threshold=" << options->GetCompileThreshold();
   return jit.release();
 }
 
@@ -145,7 +156,34 @@
 
 void Jit::DeleteThreadPool() {
   if (instrumentation_cache_.get() != nullptr) {
-    instrumentation_cache_->DeleteThreadPool();
+    instrumentation_cache_->DeleteThreadPool(Thread::Current());
+  }
+}
+
+void Jit::SaveProfilingInfo(const std::string& filename) {
+  if (offline_profile_info_ == nullptr) {
+    return;
+  }
+  // Note that we can't check the PrimaryOatFile when constructing the offline_profilie_info_
+  // because it becomes known to the Runtime after we create and initialize the JIT.
+  const OatFile* primary_oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile();
+  if (primary_oat_file == nullptr) {
+    LOG(WARNING) << "Couldn't find a primary oat file when trying to save profile info to "
+                 << filename;
+    return;
+  }
+
+  uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
+  if (offline_profile_info_->NeedsSaving(last_update_ns)) {
+    VLOG(profiler) << "Iniate save profiling information to: " << filename;
+    std::set<ArtMethod*> methods;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      code_cache_->GetCompiledArtMethods(primary_oat_file, methods);
+    }
+    offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods);
+  } else {
+    VLOG(profiler) << "No need to save profiling information to: " << filename;
   }
 }
 
@@ -164,16 +202,8 @@
 
 void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
   CHECK_GT(compile_threshold, 0U);
-  ScopedSuspendAll ssa(__FUNCTION__);
-  // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
-  // something.
   instrumentation_cache_.reset(
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
-  Runtime::Current()->GetInstrumentation()->AddListener(
-      new jit::JitInstrumentationListener(instrumentation_cache_.get()),
-      instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
-      instrumentation::Instrumentation::kInvokeVirtualOrInterface);
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 1f89f9b..630eba3 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -26,6 +26,7 @@
 #include "gc_root.h"
 #include "jni.h"
 #include "object_callbacks.h"
+#include "offline_profiling_info.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -71,6 +72,8 @@
     return instrumentation_cache_.get();
   }
 
+  void SaveProfilingInfo(const std::string& filename);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -90,6 +93,7 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
+  std::unique_ptr<OfflineProfilingInfo> offline_profile_info_;
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
@@ -102,28 +106,44 @@
   size_t GetWarmupThreshold() const {
     return warmup_threshold_;
   }
-  size_t GetCodeCacheCapacity() const {
-    return code_cache_capacity_;
+  size_t GetCodeCacheInitialCapacity() const {
+    return code_cache_initial_capacity_;
+  }
+  size_t GetCodeCacheMaxCapacity() const {
+    return code_cache_max_capacity_;
   }
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  bool GetSaveProfilingInfo() const {
+    return save_profiling_info_;
+  }
   bool UseJIT() const {
     return use_jit_;
   }
   void SetUseJIT(bool b) {
     use_jit_ = b;
   }
+  void SetSaveProfilingInfo(bool b) {
+    save_profiling_info_ = b;
+  }
 
  private:
   bool use_jit_;
-  size_t code_cache_capacity_;
+  size_t code_cache_initial_capacity_;
+  size_t code_cache_max_capacity_;
   size_t compile_threshold_;
   size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
+  bool save_profiling_info_;
 
-  JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
-      dump_info_on_shutdown_(false) { }
+  JitOptions()
+      : use_jit_(false),
+        code_cache_initial_capacity_(0),
+        code_cache_max_capacity_(0),
+        compile_threshold_(0),
+        dump_info_on_shutdown_(false),
+        save_profiling_info_(false) { }
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index ce972ef..804d69f 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "base/time_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "jit/profiling_info.h"
@@ -44,79 +45,106 @@
     }                                                       \
   } while (false)                                           \
 
-JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) {
-  CHECK_GT(capacity, 0U);
-  CHECK_LT(capacity, kMaxCapacity);
+JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
+                                   size_t max_capacity,
+                                   std::string* error_msg) {
+  CHECK_GE(max_capacity, initial_capacity);
+  // We need to have 32 bit offsets from method headers in code cache which point to things
+  // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
+  // Ensure we're below 1 GB to be safe.
+  if (max_capacity > 1 * GB) {
+    std::ostringstream oss;
+    oss << "Maxium code cache capacity is limited to 1 GB, "
+        << PrettySize(max_capacity) << " is too big";
+    *error_msg = oss.str();
+    return nullptr;
+  }
+
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   MemMap* data_map = MemMap::MapAnonymous(
-    "data-code-cache", nullptr, capacity, kProtAll, false, false, &error_str);
+    "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str);
   if (data_map == nullptr) {
     std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
     *error_msg = oss.str();
     return nullptr;
   }
 
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+
   // Data cache is 1 / 2 of the map.
   // TODO: Make this variable?
-  size_t data_size = RoundUp(data_map->Size() / 2, kPageSize);
-  size_t code_size = data_map->Size() - data_size;
+  size_t data_size = max_capacity / 2;
+  size_t code_size = max_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, max_capacity);
   uint8_t* divider = data_map->Begin() + data_size;
 
-  // We need to have 32 bit offsets from method headers in code cache which point to things
-  // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str);
   if (code_map == nullptr) {
     std::ostringstream oss;
-    oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
     *error_msg = oss.str();
     return nullptr;
   }
-  DCHECK_EQ(code_map->Size(), code_size);
   DCHECK_EQ(code_map->Begin(), divider);
-  return new JitCodeCache(code_map, data_map);
+  data_size = initial_capacity / 2;
+  code_size = initial_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, initial_capacity);
+  return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity);
 }
 
-JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map)
+JitCodeCache::JitCodeCache(MemMap* code_map,
+                           MemMap* data_map,
+                           size_t initial_code_capacity,
+                           size_t initial_data_capacity,
+                           size_t max_capacity)
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache variable", lock_),
       collection_in_progress_(false),
       code_map_(code_map),
-      data_map_(data_map) {
+      data_map_(data_map),
+      max_capacity_(max_capacity),
+      current_capacity_(initial_code_capacity + initial_data_capacity),
+      code_end_(initial_code_capacity),
+      data_end_(initial_data_capacity),
+      has_done_one_collection_(false),
+      last_update_time_ns_(0) {
 
-  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_map_->Size(), false /*locked*/);
-  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_map_->Size(), false /*locked*/);
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
+  data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
   if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
     PLOG(FATAL) << "create_mspace_with_base failed";
   }
 
-  // Prevent morecore requests from the mspace.
-  mspace_set_footprint_limit(code_mspace_, code_map_->Size());
-  mspace_set_footprint_limit(data_mspace_, data_map_->Size());
+  SetFootprintLimit(current_capacity_);
 
   CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
 
-  live_bitmap_.reset(CodeCacheBitmap::Create("code-cache-bitmap",
-                                             reinterpret_cast<uintptr_t>(code_map_->Begin()),
-                                             reinterpret_cast<uintptr_t>(code_map_->End())));
-
-  if (live_bitmap_.get() == nullptr) {
-    PLOG(FATAL) << "creating bitmaps for the JIT code cache failed";
-  }
-
-  VLOG(jit) << "Created jit code cache: data size="
-            << PrettySize(data_map_->Size())
-            << ", code size="
-            << PrettySize(code_map_->Size());
+  VLOG(jit) << "Created jit code cache: initial data size="
+            << PrettySize(initial_data_capacity)
+            << ", initial code size="
+            << PrettySize(initial_code_capacity);
 }
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
   return code_map_->Begin() <= ptr && ptr < code_map_->End();
 }
 
+bool JitCodeCache::ContainsMethod(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto& it : method_code_map_) {
+    if (it.second == method) {
+      return true;
+    }
+  }
+  return false;
+}
+
 class ScopedCodeCacheWrite {
  public:
   explicit ScopedCodeCacheWrite(MemMap* code_map) : code_map_(code_map) {
@@ -248,45 +276,65 @@
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
-
-  ScopedThreadSuspension sts(self, kSuspended);
-  MutexLock mu(self, lock_);
-  WaitForPotentialCollectionToComplete(self);
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    uint8_t* result = reinterpret_cast<uint8_t*>(
-        mspace_memalign(code_mspace_, alignment, total_size));
-    if (result == nullptr) {
-      return nullptr;
-    }
-    code_ptr = result + header_size;
-    DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      uint8_t* result = reinterpret_cast<uint8_t*>(
+          mspace_memalign(code_mspace_, alignment, total_size));
+      if (result == nullptr) {
+        return nullptr;
+      }
+      code_ptr = result + header_size;
+      DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
 
-    std::copy(code, code + code_size, code_ptr);
-    method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-    new (method_header) OatQuickMethodHeader(
-        (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
-        (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
-        (gc_map == nullptr) ? 0 : code_ptr - gc_map,
-        frame_size_in_bytes,
-        core_spill_mask,
-        fp_spill_mask,
-        code_size);
+      std::copy(code, code + code_size, code_ptr);
+      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      new (method_header) OatQuickMethodHeader(
+          (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
+          (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
+          (gc_map == nullptr) ? 0 : code_ptr - gc_map,
+          frame_size_in_bytes,
+          core_spill_mask,
+          fp_spill_mask,
+          code_size);
+    }
+
+    __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
+                            reinterpret_cast<char*>(code_ptr + code_size));
+  }
+  // We need to update the entry point in the runnable state for the instrumentation.
+  {
+    MutexLock mu(self, lock_);
+    method_code_map_.Put(code_ptr, method);
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, method_header->GetEntryPoint());
+    if (collection_in_progress_) {
+      // We need to update the live bitmap if there is a GC to ensure it sees this new
+      // code.
+      GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
+    }
+    last_update_time_ns_ = NanoTime();
+    VLOG(jit)
+        << "JIT added "
+        << PrettyMethod(method) << "@" << method
+        << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
+        << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
   }
 
-  __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
-                          reinterpret_cast<char*>(code_ptr + code_size));
-  method_code_map_.Put(code_ptr, method);
-  // We have checked there was no collection in progress earlier. If we
-  // were, setting the entry point of a method would be unsafe, as the collection
-  // could delete it.
-  DCHECK(!collection_in_progress_);
-  method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
+  return CodeCacheSizeLocked();
+}
+
+size_t JitCodeCache::CodeCacheSizeLocked() {
   size_t bytes_allocated = 0;
   mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
   return bytes_allocated;
@@ -294,6 +342,10 @@
 
 size_t JitCodeCache::DataCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
+  return DataCacheSizeLocked();
+}
+
+size_t JitCodeCache::DataCacheSizeLocked() {
   size_t bytes_allocated = 0;
   mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
   return bytes_allocated;
@@ -304,6 +356,11 @@
   return method_code_map_.size();
 }
 
+void JitCodeCache::ClearData(Thread* self, void* data) {
+  MutexLock mu(self, lock_);
+  mspace_free(data_mspace_, data);
+}
+
 uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
   size = RoundUp(size, sizeof(void*));
   uint8_t* result = nullptr;
@@ -370,6 +427,23 @@
     DCHECK(thread == Thread::Current() || thread->IsSuspended());
     MarkCodeVisitor visitor(thread, code_cache_);
     visitor.WalkStack();
+    if (kIsDebugBuild) {
+      // The stack walking code queries the side instrumentation stack if it
+      // sees an instrumentation exit pc, so the JIT code of methods in that stack
+      // must have been seen. We sanity check this below.
+      for (const instrumentation::InstrumentationStackFrame& frame
+              : *thread->GetInstrumentationStack()) {
+        // The 'method_' in InstrumentationStackFrame is the one that has return_pc_ in
+        // its stack frame, it is not the method owning return_pc_. We just pass null to
+        // LookupMethodHeader: the method is only checked against in debug builds.
+        OatQuickMethodHeader* method_header =
+            code_cache_->LookupMethodHeader(frame.return_pc_, nullptr);
+        if (method_header != nullptr) {
+          const void* code = method_header->GetCode();
+          CHECK(code_cache_->GetLiveBitmap()->Test(FromCodeToAllocation(code)));
+        }
+      }
+    }
     barrier_->Pass(Thread::Current());
   }
 
@@ -378,27 +452,88 @@
   Barrier* const barrier_;
 };
 
+void JitCodeCache::NotifyCollectionDone(Thread* self) {
+  collection_in_progress_ = false;
+  lock_cond_.Broadcast(self);
+}
+
+void JitCodeCache::SetFootprintLimit(size_t new_footprint) {
+  size_t per_space_footprint = new_footprint / 2;
+  DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
+  DCHECK_EQ(per_space_footprint * 2, new_footprint);
+  mspace_set_footprint_limit(data_mspace_, per_space_footprint);
+  {
+    ScopedCodeCacheWrite scc(code_map_.get());
+    mspace_set_footprint_limit(code_mspace_, per_space_footprint);
+  }
+}
+
+bool JitCodeCache::IncreaseCodeCacheCapacity() {
+  if (current_capacity_ == max_capacity_) {
+    return false;
+  }
+
+  // Double the capacity if we're below 1MB, or increase it by 1MB if
+  // we're above.
+  if (current_capacity_ < 1 * MB) {
+    current_capacity_ *= 2;
+  } else {
+    current_capacity_ += 1 * MB;
+  }
+  if (current_capacity_ > max_capacity_) {
+    current_capacity_ = max_capacity_;
+  }
+
+  if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
+    LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_);
+  }
+
+  SetFootprintLimit(current_capacity_);
+
+  return true;
+}
+
 void JitCodeCache::GarbageCollectCache(Thread* self) {
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+
+  // Wait for an existing collection, or let everyone know we are starting one.
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    if (WaitForPotentialCollectionToComplete(self)) {
+      return;
+    } else {
+      collection_in_progress_ = true;
+    }
+  }
+
+  // Check if we just need to grow the capacity. If we don't, allocate the bitmap while
+  // we hold the lock.
+  {
+    MutexLock mu(self, lock_);
+    if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) {
+      has_done_one_collection_ = false;
+      NotifyCollectionDone(self);
+      return;
+    } else {
+      live_bitmap_.reset(CodeCacheBitmap::Create(
+          "code-cache-bitmap",
+          reinterpret_cast<uintptr_t>(code_map_->Begin()),
+          reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2)));
+    }
+  }
+
   if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
     LOG(INFO) << "Clearing code cache, code="
               << PrettySize(CodeCacheSize())
               << ", data=" << PrettySize(DataCacheSize());
   }
-
-  size_t map_size = 0;
-  ScopedThreadSuspension sts(self, kSuspended);
-
   // Walk over all compiled methods and set the entry points of these
   // methods to interpreter.
   {
     MutexLock mu(self, lock_);
-    if (WaitForPotentialCollectionToComplete(self)) {
-      return;
-    }
-    collection_in_progress_ = true;
-    map_size = method_code_map_.size();
     for (auto& it : method_code_map_) {
-      it.second->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+      instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
       info->GetMethod()->SetProfilingInfo(nullptr);
@@ -409,16 +544,12 @@
   {
     Barrier barrier(0);
     size_t threads_running_checkpoint = 0;
-    {
-      // Walking the stack requires the mutator lock.
-      // We only take the lock when running the checkpoint and not waiting so that
-      // when we go back to suspended, we can execute checkpoints that were requested
-      // concurrently, and then move to waiting for our own checkpoint to finish.
-      ScopedObjectAccess soa(self);
-      MarkCodeClosure closure(this, &barrier);
-      threads_running_checkpoint =
-          Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
-    }
+    MarkCodeClosure closure(this, &barrier);
+    threads_running_checkpoint =
+        Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+    // Now that we have run our checkpoint, move to a suspended state and wait
+    // for other threads to run the checkpoint.
+    ScopedThreadSuspension sts(self, kSuspended);
     if (threads_running_checkpoint != 0) {
       barrier.Increment(self, threads_running_checkpoint);
     }
@@ -426,7 +557,6 @@
 
   {
     MutexLock mu(self, lock_);
-    DCHECK_EQ(map_size, method_code_map_.size());
     // Free unused compiled code, and restore the entry point of used compiled code.
     {
       ScopedCodeCacheWrite scc(code_map_.get());
@@ -436,7 +566,7 @@
         uintptr_t allocation = FromCodeToAllocation(code_ptr);
         const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
         if (GetLiveBitmap()->Test(allocation)) {
-          method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
+          instrumentation->UpdateMethodsCode(method, method_header->GetEntryPoint());
           ++it;
         } else {
           method->ClearCounter();
@@ -446,7 +576,6 @@
         }
       }
     }
-    GetLiveBitmap()->Bitmap::Clear();
 
     // Free all profiling info.
     for (ProfilingInfo* info : profiling_infos_) {
@@ -455,8 +584,9 @@
     }
     profiling_infos_.clear();
 
-    collection_in_progress_ = false;
-    lock_cond_.Broadcast(self);
+    live_bitmap_.reset(nullptr);
+    has_done_one_collection_ = true;
+    NotifyCollectionDone(self);
   }
 
   if (!kIsDebugBuild || VLOG_IS_ON(jit)) {
@@ -489,8 +619,10 @@
   if (!method_header->Contains(pc)) {
     return nullptr;
   }
-  DCHECK_EQ(it->second, method)
-      << PrettyMethod(method) << " " << PrettyMethod(it->second) << " " << std::hex << pc;
+  if (kIsDebugBuild && method != nullptr) {
+    DCHECK_EQ(it->second, method)
+        << PrettyMethod(method) << " " << PrettyMethod(it->second) << " " << std::hex << pc;
+  }
   return method_header;
 }
 
@@ -533,5 +665,34 @@
   return info;
 }
 
+// NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock
+// is already held.
+void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS {
+  if (code_mspace_ == mspace) {
+    size_t result = code_end_;
+    code_end_ += increment;
+    return reinterpret_cast<void*>(result + code_map_->Begin());
+  } else {
+    DCHECK_EQ(data_mspace_, mspace);
+    size_t result = data_end_;
+    data_end_ += increment;
+    return reinterpret_cast<void*>(result + data_map_->Begin());
+  }
+}
+
+void JitCodeCache::GetCompiledArtMethods(const OatFile* oat_file,
+                                         std::set<ArtMethod*>& methods) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto it : method_code_map_) {
+    if (it.second->GetDexFile()->GetOatDexFile()->GetOatFile() == oat_file) {
+      methods.insert(it.second);
+    }
+  }
+}
+
+uint64_t JitCodeCache::GetLastUpdateTimeNs() {
+  MutexLock mu(Thread::Current(), lock_);
+  return last_update_time_ns_;
+}
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index e10f962..acd7c62 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -41,20 +41,20 @@
 
 class JitInstrumentationCache;
 
-// Alignment that will suit all architectures.
+// Alignment in bits that will suit all architectures.
 static constexpr int kJitCodeAlignment = 16;
 using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAlignment>;
 
 class JitCodeCache {
  public:
-  static constexpr size_t kMaxCapacity = 1 * GB;
+  static constexpr size_t kMaxCapacity = 64 * MB;
   // Put the default to a very low amount for debug builds to stress the code cache
   // collection.
-  static constexpr size_t kDefaultCapacity = kIsDebugBuild ? 20 * KB : 2 * MB;
+  static constexpr size_t kInitialCapacity = kIsDebugBuild ? 16 * KB : 64 * KB;
 
   // Create the code cache with a code + data capacity equal to "capacity", error message is passed
   // in the out arg error_msg.
-  static JitCodeCache* Create(size_t capacity, std::string* error_msg);
+  static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSize() REQUIRES(!lock_);
@@ -83,11 +83,19 @@
   // Return true if the code cache contains this pc.
   bool ContainsPc(const void* pc) const;
 
+  // Return true if the code cache contains this method.
+  bool ContainsMethod(ArtMethod* method) REQUIRES(!lock_);
+
   // Reserve a region of data of size at least "size". Returns null if there is no more room.
   uint8_t* ReserveData(Thread* self, size_t size)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
+  // Clear data from the data portion of the code cache.
+  void ClearData(Thread* self, void* data)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Add a data array of size (end - begin) with the associated contents, returns null if there
   // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
@@ -125,9 +133,26 @@
       REQUIRES(!lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS {
+    return mspace == code_mspace_ || mspace == data_mspace_;
+  }
+
+  void* MoreCore(const void* mspace, intptr_t increment);
+
+  // Adds to `methods` all the compiled ArtMethods which are part of the given `oat_file`.
+  void GetCompiledArtMethods(const OatFile* oat_file, std::set<ArtMethod*>& methods)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_);
+
  private:
-  // Take ownership of code_mem_map.
-  JitCodeCache(MemMap* code_map, MemMap* data_map);
+  // Take ownership of maps.
+  JitCodeCache(MemMap* code_map,
+               MemMap* data_map,
+               size_t initial_code_capacity,
+               size_t initial_data_capacity,
+               size_t max_capacity);
 
   // Internal version of 'CommitCode' that will not retry if the
   // allocation fails. Return null if the allocation fails.
@@ -158,6 +183,22 @@
   // Free in the mspace allocations taken by 'method'.
   void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
 
+  // Number of bytes allocated in the code cache.
+  size_t CodeCacheSizeLocked() REQUIRES(lock_);
+
+  // Number of bytes allocated in the data cache.
+  size_t DataCacheSizeLocked() REQUIRES(lock_);
+
+  // Notify all waiting threads that a collection is done.
+  void NotifyCollectionDone(Thread* self) REQUIRES(lock_);
+
+  // Try to increase the current capacity of the code cache. Return whether we
+  // succeeded at doing so.
+  bool IncreaseCodeCacheCapacity() REQUIRES(lock_);
+
+  // Set the footprint limit of the code cache.
+  void SetFootprintLimit(size_t new_footprint) REQUIRES(lock_);
+
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
@@ -179,6 +220,24 @@
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
 
+  // The maximum capacity in bytes this code cache can go to.
+  size_t max_capacity_ GUARDED_BY(lock_);
+
+  // The current capacity in bytes of the code cache.
+  size_t current_capacity_ GUARDED_BY(lock_);
+
+  // The current footprint in bytes of the code portion of the code cache.
+  size_t code_end_ GUARDED_BY(lock_);
+
+  // The current footprint in bytes of the data portion of the code cache.
+  size_t data_end_ GUARDED_BY(lock_);
+
+  // Whether a collection has already been done on the current capacity.
+  bool has_done_one_collection_ GUARDED_BY(lock_);
+
+  // Last time the the code_cache was updated.
+  uint64_t last_update_time_ns_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 7931306..6531325 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -20,6 +20,7 @@
 #include "jit.h"
 #include "jit_code_cache.h"
 #include "scoped_thread_state_change.h"
+#include "thread_list.h"
 
 namespace art {
 namespace jit {
@@ -73,16 +74,48 @@
 JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
                                                  size_t warm_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
-      warm_method_threshold_(warm_method_threshold) {
+      warm_method_threshold_(warm_method_threshold),
+      listener_(this) {
 }
 
 void JitInstrumentationCache::CreateThreadPool() {
+  // Create the thread pool before setting the instrumentation, so that
+  // when the threads stopped being suspended, they can use it directly.
+  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
+  // is not null when we instrument.
   thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
+  thread_pool_->StartWorkers(Thread::Current());
+  {
+    // Add Jit interpreter instrumentation, tells the interpreter when
+    // to notify the jit to compile something.
+    ScopedSuspendAll ssa(__FUNCTION__);
+    Runtime::Current()->GetInstrumentation()->AddListener(
+        &listener_, JitInstrumentationListener::kJitEvents);
+  }
 }
 
-void JitInstrumentationCache::DeleteThreadPool() {
-  DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
-  thread_pool_.reset();
+void JitInstrumentationCache::DeleteThreadPool(Thread* self) {
+  DCHECK(Runtime::Current()->IsShuttingDown(self));
+  if (thread_pool_ != nullptr) {
+    // First remove the listener, to avoid having mutators enter
+    // 'AddSamples'.
+    ThreadPool* cache = nullptr;
+    {
+      ScopedSuspendAll ssa(__FUNCTION__);
+      Runtime::Current()->GetInstrumentation()->RemoveListener(
+          &listener_, JitInstrumentationListener::kJitEvents);
+      // Clear thread_pool_ field while the threads are suspended.
+      // A mutator in the 'AddSamples' method will check against it.
+      cache = thread_pool_.release();
+    }
+    cache->StopWorkers(self);
+    cache->RemoveAllTasks(self);
+    // We could just suspend all threads, but we know those threads
+    // will finish in a short period, so it's not worth adding a suspend logic
+    // here. Besides, this is only done for shutdown.
+    cache->Wait(self, false, false);
+    delete cache;
+  }
 }
 
 void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
@@ -91,25 +124,32 @@
   if (method->IsClassInitializer() || method->IsNative()) {
     return;
   }
-  if (thread_pool_.get() == nullptr) {
-    DCHECK(Runtime::Current()->IsShuttingDown(self));
-    return;
-  }
+  DCHECK(thread_pool_ != nullptr);
+
   uint16_t sample_count = method->IncrementCounter();
   if (sample_count == warm_method_threshold_) {
-    if (ProfilingInfo::Create(self, method, /* retry_allocation */ false)) {
+    bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+    if (success) {
       VLOG(jit) << "Start profiling " << PrettyMethod(method);
-    } else {
+    }
+
+    if (thread_pool_ == nullptr) {
+      // Calling ProfilingInfo::Create might put us in a suspended state, which could
+      // lead to the thread pool being deleted when we are shutting down.
+      DCHECK(Runtime::Current()->IsShuttingDown(self));
+      return;
+    }
+
+    if (!success) {
       // We failed allocating. Instead of doing the collection on the Java thread, we push
       // an allocation to a compiler thread, that will do the collection.
       thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
-      thread_pool_->StartWorkers(self);
     }
   }
 
   if (sample_count == hot_method_threshold_) {
+    DCHECK(thread_pool_ != nullptr);
     thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
-    thread_pool_->StartWorkers(self);
   }
 }
 
@@ -118,6 +158,20 @@
   CHECK(instrumentation_cache_ != nullptr);
 }
 
+void JitInstrumentationListener::MethodEntered(Thread* thread,
+                                               mirror::Object* /*this_object*/,
+                                               ArtMethod* method,
+                                               uint32_t /*dex_pc*/) {
+  instrumentation_cache_->AddSamples(thread, method, 1);
+}
+
+void JitInstrumentationListener::BackwardBranch(Thread* thread,
+                                                ArtMethod* method,
+                                                int32_t dex_pc_offset) {
+  CHECK_LE(dex_pc_offset, 0);
+  instrumentation_cache_->AddSamples(thread, method, 1);
+}
+
 void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
                                                           mirror::Object* this_object,
                                                           ArtMethod* caller,
@@ -138,7 +192,9 @@
 }
 
 void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
-  thread_pool_->Wait(self, false, false);
+  if (thread_pool_ != nullptr) {
+    thread_pool_->Wait(self, false, false);
+  }
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 9eb464b..1f96d59 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -31,7 +31,6 @@
 
 namespace art {
 namespace mirror {
-  class Class;
   class Object;
   class Throwable;
 }  // namespace mirror
@@ -42,24 +41,7 @@
 
 namespace jit {
 
-// Keeps track of which methods are hot.
-class JitInstrumentationCache {
- public:
-  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateThreadPool();
-  void DeleteThreadPool();
-  // Wait until there is no more pending compilation tasks.
-  void WaitForCompilationToFinish(Thread* self);
-
- private:
-  size_t hot_method_threshold_;
-  size_t warm_method_threshold_;
-  std::unique_ptr<ThreadPool> thread_pool_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
-};
+class JitInstrumentationCache;
 
 class JitInstrumentationListener : public instrumentation::InstrumentationListener {
  public:
@@ -67,9 +49,8 @@
 
   void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
                      ArtMethod* method, uint32_t /*dex_pc*/)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
   void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
                     ArtMethod* /*method*/, uint32_t /*dex_pc*/,
                     const JValue& /*return_value*/)
@@ -90,10 +71,7 @@
                   ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
   void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    CHECK_LE(dex_pc_offset, 0);
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
@@ -102,12 +80,37 @@
                                 ArtMethod* callee)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static constexpr uint32_t kJitEvents =
+      instrumentation::Instrumentation::kMethodEntered |
+      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kInvokeVirtualOrInterface;
+
  private:
   JitInstrumentationCache* const instrumentation_cache_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener);
 };
 
+// Keeps track of which methods are hot.
+class JitInstrumentationCache {
+ public:
+  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
+  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void CreateThreadPool();
+  void DeleteThreadPool(Thread* self);
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
+
+ private:
+  size_t hot_method_threshold_;
+  size_t warm_method_threshold_;
+  JitInstrumentationListener listener_;
+  std::unique_ptr<ThreadPool> thread_pool_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
+};
+
 }  // namespace jit
 }  // namespace art
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
new file mode 100644
index 0000000..4450653
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offline_profiling_info.h"
+
+#include <fstream>
+#include <set>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+
+#include "art_method-inl.h"
+#include "base/mutex.h"
+#include "jit/profiling_info.h"
+#include "safe_map.h"
+#include "utils.h"
+
+namespace art {
+
+// An arbitrary value to throttle save requests. Set to 500ms for now.
+static constexpr const uint64_t kMilisecondsToNano = 1000000;
+static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano;
+
+bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const {
+  return last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs;
+}
+
+void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename,
+                                             uint64_t last_update_time_ns,
+                                             const std::set<ArtMethod*>& methods) {
+  if (!NeedsSaving(last_update_time_ns)) {
+    VLOG(profiler) << "No need to saved profile info to " << filename;
+    return;
+  }
+
+  if (methods.empty()) {
+    VLOG(profiler) << "No info to save to " << filename;
+    return;
+  }
+
+  DexFileToMethodsMap info;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    for (auto it = methods.begin(); it != methods.end(); it++) {
+      AddMethodInfo(*it, &info);
+    }
+  }
+
+  // This doesn't need locking because we are trying to lock the file for exclusive
+  // access and fail immediately if we can't.
+  if (Serialize(filename, info)) {
+    last_update_time_ns_.StoreRelaxed(last_update_time_ns);
+    VLOG(profiler) << "Successfully saved profile info to "
+                   << filename << " with time stamp: " << last_update_time_ns;
+  }
+}
+
+
+void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
+  DCHECK(method != nullptr);
+  const DexFile* dex_file = method->GetDexFile();
+
+  auto info_it = info->find(dex_file);
+  if (info_it == info->end()) {
+    info_it = info->Put(dex_file, std::set<uint32_t>());
+  }
+  info_it->second.insert(method->GetDexMethodIndex());
+}
+
+static int OpenOrCreateFile(const std::string& filename) {
+  // TODO(calin) allow the shared uid of the app to access the file.
+  int fd = open(filename.c_str(),
+                O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    PLOG(WARNING) << "Failed to open profile file " << filename;
+    return -1;
+  }
+
+  // Lock the file for exclusive access but don't wait if we can't lock it.
+  int err = flock(fd, LOCK_EX | LOCK_NB);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to lock profile file " << filename;
+    return -1;
+  }
+
+  return fd;
+}
+
+static bool CloseDescriptorForFile(int fd, const std::string& filename) {
+  // Now unlock the file, allowing another process in.
+  int err = flock(fd, LOCK_UN);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to unlock profile file " << filename;
+    return false;
+  }
+
+  // Done, close the file.
+  err = ::close(fd);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
+    return false;
+  }
+
+  return true;
+}
+
+static void WriteToFile(int fd, const std::ostringstream& os) {
+  std::string data(os.str());
+  const char *p = data.c_str();
+  size_t length = data.length();
+  do {
+    int n = ::write(fd, p, length);
+    p += n;
+    length -= n;
+  } while (length > 0);
+}
+
+static constexpr char kFieldSeparator = ',';
+static constexpr char kLineSeparator = '\n';
+
+/**
+ * Serialization format:
+ *    multidex_suffix1,dex_location_checksum1,method_id11,method_id12...
+ *    multidex_suffix2,dex_location_checksum2,method_id21,method_id22...
+ * e.g.
+ *    ,131232145,11,23,454,54               -> this is the first dex file, it has no multidex suffix
+ *    :classes5.dex,218490184,39,13,49,1    -> this is the fifth dex file.
+ **/
+bool OfflineProfilingInfo::Serialize(const std::string& filename,
+                                     const DexFileToMethodsMap& info) const {
+  int fd = OpenOrCreateFile(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  // TODO(calin): Merge with a previous existing profile.
+  // TODO(calin): Profile this and see how much memory it takes. If too much,
+  // write to file directly.
+  std::ostringstream os;
+  for (auto it : info) {
+    const DexFile* dex_file = it.first;
+    const std::set<uint32_t>& method_dex_ids = it.second;
+
+    os << DexFile::GetMultiDexSuffix(dex_file->GetLocation())
+        << kFieldSeparator
+        << dex_file->GetLocationChecksum();
+    for (auto method_it : method_dex_ids) {
+      os << kFieldSeparator << method_it;
+    }
+    os << kLineSeparator;
+  }
+
+  WriteToFile(fd, os);
+
+  return CloseDescriptorForFile(fd, filename);
+}
+}  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
new file mode 100644
index 0000000..e3117eb
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+
+#include <set>
+
+#include "atomic.h"
+#include "dex_file.h"
+#include "safe_map.h"
+
+namespace art {
+
+class ArtMethod;
+
+/**
+ * Profiling information in a format that can be serialized to disk.
+ * It is a serialize-friendly format based on information collected
+ * by the interpreter (ProfileInfo).
+ * Currently it stores only the hot compiled methods.
+ */
+class OfflineProfilingInfo {
+ public:
+  bool NeedsSaving(uint64_t last_update_time_ns) const;
+  void SaveProfilingInfo(const std::string& filename,
+                         uint64_t last_update_time_ns,
+                         const std::set<ArtMethod*>& methods);
+
+ private:
+  // Map identifying the location of the profiled methods.
+  // dex_file_ -> [dex_method_index]+
+  using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>;
+
+  void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
+
+  // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
+  // singe thread?)
+  Atomic<uint64_t> last_update_time_ns_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 234a733..5e3fa19 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -1670,7 +1670,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -1684,12 +1684,13 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
       const jchar* chars = s->GetValue();
-      ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
+      size_t bytes = CountUtf8Bytes(chars + start, length);
+      ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length);
     }
   }
 
@@ -1772,7 +1773,7 @@
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
     const uint16_t* chars = s->GetValue();
-    ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
+    ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength());
     bytes[byte_count] = '\0';
     return bytes;
   }
@@ -2473,7 +2474,7 @@
                                                               "GetPrimitiveArrayRegion",
                                                               "get region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "src");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2493,7 +2494,7 @@
                                                               "SetPrimitiveArrayRegion",
                                                               "set region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "dst");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 41b368e..649df5f 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1077,6 +1077,12 @@
   env_->set_region_fn(a, size - 1, size, nullptr); \
   ExpectException(aioobe_); \
   \
+  /* Regression test against integer overflow in range check. */ \
+  env_->get_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  \
   /* It's okay for the buffer to be null as long as the length is 0. */ \
   env_->get_region_fn(a, 2, 0, nullptr); \
   /* Even if the offset is invalid... */ \
@@ -1507,6 +1513,9 @@
   ExpectException(sioobe_);
   env_->GetStringRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   jchar chars[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringRegion(s, 1, 2, &chars[1]);
@@ -1529,6 +1538,9 @@
   ExpectException(sioobe_);
   env_->GetStringUTFRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringUTFRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   char bytes[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringUTFRegion(s, 1, 2, &bytes[1]);
diff --git a/runtime/lambda/box_table.cc b/runtime/lambda/box_table.cc
index 8eef10b..9918bb7 100644
--- a/runtime/lambda/box_table.cc
+++ b/runtime/lambda/box_table.cc
@@ -62,7 +62,7 @@
 
 BoxTable::~BoxTable() {
   // Free all the copies of our closures.
-  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ++map_iterator) {
+  for (auto map_iterator = map_.begin(); map_iterator != map_.end(); ) {
     std::pair<UnorderedMapKeyType, ValueType>& key_value_pair = *map_iterator;
 
     Closure* closure = key_value_pair.first;
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 2d3581d..e133847 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -154,8 +154,10 @@
   }
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to build process map");
+  if (map == nullptr) {
+    if (error_msg != nullptr) {
+      *error_msg = StringPrintf("Failed to build process map");
+    }
     return false;
   }
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
@@ -164,9 +166,11 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
-  *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
-                            "any existing map. See process maps in the log.", begin, end);
+  if (error_msg != nullptr) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
+    *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
+                              "any existing map. See process maps in the log.", begin, end);
+  }
   return false;
 }
 
@@ -239,22 +243,27 @@
   std::string error_detail;
   CheckNonOverlapping(expected, limit, &error_detail);
 
-  std::ostringstream os;
-  os <<  StringPrintf("Failed to mmap at expected address, mapped at "
-                      "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
-                      actual, expected);
-  if (!error_detail.empty()) {
-    os << " : " << error_detail;
+  if (error_msg != nullptr) {
+    std::ostringstream os;
+    os <<  StringPrintf("Failed to mmap at expected address, mapped at "
+                        "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
+                        actual, expected);
+    if (!error_detail.empty()) {
+      os << " : " << error_detail;
+    }
+    *error_msg = os.str();
   }
-
-  *error_msg = os.str();
   return false;
 }
 
 #if USE_ART_LOW_4G_ALLOCATOR
-static inline void* TryMemMapLow4GB(void* ptr, size_t page_aligned_byte_count, int prot, int flags,
-                                    int fd) {
-  void* actual = mmap(ptr, page_aligned_byte_count, prot, flags, fd, 0);
+static inline void* TryMemMapLow4GB(void* ptr,
+                                    size_t page_aligned_byte_count,
+                                    int prot,
+                                    int flags,
+                                    int fd,
+                                    off_t offset) {
+  void* actual = mmap(ptr, page_aligned_byte_count, prot, flags, fd, offset);
   if (actual != MAP_FAILED) {
     // Since we didn't use MAP_FIXED the kernel may have mapped it somewhere not in the low
     // 4GB. If this is the case, unmap and retry.
@@ -267,8 +276,13 @@
 }
 #endif
 
-MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byte_count, int prot,
-                             bool low_4gb, bool reuse, std::string* error_msg) {
+MemMap* MemMap::MapAnonymous(const char* name,
+                             uint8_t* expected_ptr,
+                             size_t byte_count,
+                             int prot,
+                             bool low_4gb,
+                             bool reuse,
+                             std::string* error_msg) {
 #ifndef __LP64__
   UNUSED(low_4gb);
 #endif
@@ -317,122 +331,14 @@
   // We need to store and potentially set an error number for pretty printing of errors
   int saved_errno = 0;
 
-#ifdef __LP64__
-  // When requesting low_4g memory and having an expectation, the requested range should fit into
-  // 4GB.
-  if (low_4gb && (
-      // Start out of bounds.
-      (reinterpret_cast<uintptr_t>(expected_ptr) >> 32) != 0 ||
-      // End out of bounds. For simplicity, this will fail for the last page of memory.
-      (reinterpret_cast<uintptr_t>(expected_ptr + page_aligned_byte_count) >> 32) != 0)) {
-    *error_msg = StringPrintf("The requested address space (%p, %p) cannot fit in low_4gb",
-                              expected_ptr, expected_ptr + page_aligned_byte_count);
-    return nullptr;
-  }
-#endif
-
-  // TODO:
-  // A page allocator would be a useful abstraction here, as
-  // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
-  // 2) The linear scheme, even with simple saving of the last known position, is very crude
-#if USE_ART_LOW_4G_ALLOCATOR
-  // MAP_32BIT only available on x86_64.
-  void* actual = MAP_FAILED;
-  if (low_4gb && expected_ptr == nullptr) {
-    bool first_run = true;
-
-    MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
-    for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) {
-      // Use maps_ as an optimization to skip over large maps.
-      // Find the first map which is address > ptr.
-      auto it = maps_->upper_bound(reinterpret_cast<void*>(ptr));
-      if (it != maps_->begin()) {
-        auto before_it = it;
-        --before_it;
-        // Start at the end of the map before the upper bound.
-        ptr = std::max(ptr, reinterpret_cast<uintptr_t>(before_it->second->BaseEnd()));
-        CHECK_ALIGNED(ptr, kPageSize);
-      }
-      while (it != maps_->end()) {
-        // How much space do we have until the next map?
-        size_t delta = reinterpret_cast<uintptr_t>(it->first) - ptr;
-        // If the space may be sufficient, break out of the loop.
-        if (delta >= page_aligned_byte_count) {
-          break;
-        }
-        // Otherwise, skip to the end of the map.
-        ptr = reinterpret_cast<uintptr_t>(it->second->BaseEnd());
-        CHECK_ALIGNED(ptr, kPageSize);
-        ++it;
-      }
-
-      // Try to see if we get lucky with this address since none of the ART maps overlap.
-      actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags,
-                               fd.get());
-      if (actual != MAP_FAILED) {
-        next_mem_pos_ = reinterpret_cast<uintptr_t>(actual) + page_aligned_byte_count;
-        break;
-      }
-
-      if (4U * GB - ptr < page_aligned_byte_count) {
-        // Not enough memory until 4GB.
-        if (first_run) {
-          // Try another time from the bottom;
-          ptr = LOW_MEM_START - kPageSize;
-          first_run = false;
-          continue;
-        } else {
-          // Second try failed.
-          break;
-        }
-      }
-
-      uintptr_t tail_ptr;
-
-      // Check pages are free.
-      bool safe = true;
-      for (tail_ptr = ptr; tail_ptr < ptr + page_aligned_byte_count; tail_ptr += kPageSize) {
-        if (msync(reinterpret_cast<void*>(tail_ptr), kPageSize, 0) == 0) {
-          safe = false;
-          break;
-        } else {
-          DCHECK_EQ(errno, ENOMEM);
-        }
-      }
-
-      next_mem_pos_ = tail_ptr;  // update early, as we break out when we found and mapped a region
-
-      if (safe == true) {
-        actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags,
-                                 fd.get());
-        if (actual != MAP_FAILED) {
-            break;
-        }
-      } else {
-        // Skip over last page.
-        ptr = tail_ptr;
-      }
-    }
-
-    if (actual == MAP_FAILED) {
-      LOG(ERROR) << "Could not find contiguous low-memory space.";
-      saved_errno = ENOMEM;
-    }
-  } else {
-    actual = mmap(expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), 0);
-    saved_errno = errno;
-  }
-
-#else
-#if defined(__LP64__)
-  if (low_4gb && expected_ptr == nullptr) {
-    flags |= MAP_32BIT;
-  }
-#endif
-
-  void* actual = mmap(expected_ptr, page_aligned_byte_count, prot, flags, fd.get(), 0);
+  void* actual = MapInternal(expected_ptr,
+                             page_aligned_byte_count,
+                             prot,
+                             flags,
+                             fd.get(),
+                             0,
+                             low_4gb);
   saved_errno = errno;
-#endif
 
   if (actual == MAP_FAILED) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
@@ -458,8 +364,15 @@
   return new MemMap(name, addr, byte_count, addr, page_aligned_byte_count, 0, true /* reuse */);
 }
 
-MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr, size_t byte_count, int prot, int flags,
-                                 int fd, off_t start, bool reuse, const char* filename,
+MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr,
+                                 size_t byte_count,
+                                 int prot,
+                                 int flags,
+                                 int fd,
+                                 off_t start,
+                                 bool low_4gb,
+                                 bool reuse,
+                                 const char* filename,
                                  std::string* error_msg) {
   CHECK_NE(0, prot);
   CHECK_NE(0, flags & (MAP_SHARED | MAP_PRIVATE));
@@ -471,7 +384,8 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg))
+        << ((error_msg != nullptr) ? *error_msg : std::string());
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -498,22 +412,25 @@
     page_aligned_byte_count += redzone_size;
   }
 
-  uint8_t* actual = reinterpret_cast<uint8_t*>(mmap(page_aligned_expected,
-                                              page_aligned_byte_count,
-                                              prot,
-                                              flags,
-                                              fd,
-                                              page_aligned_offset));
+  uint8_t* actual = reinterpret_cast<uint8_t*>(MapInternal(page_aligned_expected,
+                                                           page_aligned_byte_count,
+                                                           prot,
+                                                           flags,
+                                                           fd,
+                                                           page_aligned_offset,
+                                                           low_4gb));
   if (actual == MAP_FAILED) {
-    auto saved_errno = errno;
+    if (error_msg != nullptr) {
+      auto saved_errno = errno;
 
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
 
-    *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
-                              ") of file '%s' failed: %s. See process maps in the log.",
-                              page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename,
-                              strerror(saved_errno));
+      *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
+                                ") of file '%s' failed: %s. See process maps in the log.",
+                                page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
+                                static_cast<int64_t>(page_aligned_offset), filename,
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
@@ -827,6 +744,132 @@
   size_ = new_size;
 }
 
+void* MemMap::MapInternal(void* addr,
+                          size_t length,
+                          int prot,
+                          int flags,
+                          int fd,
+                          off_t offset,
+                          bool low_4gb) {
+#ifdef __LP64__
+  // When requesting low_4g memory and having an expectation, the requested range should fit into
+  // 4GB.
+  if (low_4gb && (
+      // Start out of bounds.
+      (reinterpret_cast<uintptr_t>(addr) >> 32) != 0 ||
+      // End out of bounds. For simplicity, this will fail for the last page of memory.
+      ((reinterpret_cast<uintptr_t>(addr) + length) >> 32) != 0)) {
+    LOG(ERROR) << "The requested address space (" << addr << ", "
+               << reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(addr) + length)
+               << ") cannot fit in low_4gb";
+    return MAP_FAILED;
+  }
+#else
+  UNUSED(low_4gb);
+#endif
+  DCHECK_ALIGNED(length, kPageSize);
+  if (low_4gb) {
+    DCHECK_EQ(flags & MAP_FIXED, 0);
+  }
+  // TODO:
+  // A page allocator would be a useful abstraction here, as
+  // 1) It is doubtful that MAP_32BIT on x86_64 is doing the right job for us
+  void* actual = MAP_FAILED;
+#if USE_ART_LOW_4G_ALLOCATOR
+  // MAP_32BIT only available on x86_64.
+  if (low_4gb && addr == nullptr) {
+    bool first_run = true;
+
+    MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_);
+    for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) {
+      // Use maps_ as an optimization to skip over large maps.
+      // Find the first map which is address > ptr.
+      auto it = maps_->upper_bound(reinterpret_cast<void*>(ptr));
+      if (it != maps_->begin()) {
+        auto before_it = it;
+        --before_it;
+        // Start at the end of the map before the upper bound.
+        ptr = std::max(ptr, reinterpret_cast<uintptr_t>(before_it->second->BaseEnd()));
+        CHECK_ALIGNED(ptr, kPageSize);
+      }
+      while (it != maps_->end()) {
+        // How much space do we have until the next map?
+        size_t delta = reinterpret_cast<uintptr_t>(it->first) - ptr;
+        // If the space may be sufficient, break out of the loop.
+        if (delta >= length) {
+          break;
+        }
+        // Otherwise, skip to the end of the map.
+        ptr = reinterpret_cast<uintptr_t>(it->second->BaseEnd());
+        CHECK_ALIGNED(ptr, kPageSize);
+        ++it;
+      }
+
+      // Try to see if we get lucky with this address since none of the ART maps overlap.
+      actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), length, prot, flags, fd, offset);
+      if (actual != MAP_FAILED) {
+        next_mem_pos_ = reinterpret_cast<uintptr_t>(actual) + length;
+        return actual;
+      }
+
+      if (4U * GB - ptr < length) {
+        // Not enough memory until 4GB.
+        if (first_run) {
+          // Try another time from the bottom;
+          ptr = LOW_MEM_START - kPageSize;
+          first_run = false;
+          continue;
+        } else {
+          // Second try failed.
+          break;
+        }
+      }
+
+      uintptr_t tail_ptr;
+
+      // Check pages are free.
+      bool safe = true;
+      for (tail_ptr = ptr; tail_ptr < ptr + length; tail_ptr += kPageSize) {
+        if (msync(reinterpret_cast<void*>(tail_ptr), kPageSize, 0) == 0) {
+          safe = false;
+          break;
+        } else {
+          DCHECK_EQ(errno, ENOMEM);
+        }
+      }
+
+      next_mem_pos_ = tail_ptr;  // update early, as we break out when we found and mapped a region
+
+      if (safe == true) {
+        actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), length, prot, flags, fd, offset);
+        if (actual != MAP_FAILED) {
+          return actual;
+        }
+      } else {
+        // Skip over last page.
+        ptr = tail_ptr;
+      }
+    }
+
+    if (actual == MAP_FAILED) {
+      LOG(ERROR) << "Could not find contiguous low-memory space.";
+      errno = ENOMEM;
+    }
+  } else {
+    actual = mmap(addr, length, prot, flags, fd, offset);
+  }
+
+#else
+#if defined(__LP64__)
+  if (low_4gb && addr == nullptr) {
+    flags |= MAP_32BIT;
+  }
+#endif
+  actual = mmap(addr, length, prot, flags, fd, offset);
+#endif
+  return actual;
+}
+
 std::ostream& operator<<(std::ostream& os, const MemMap& mem_map) {
   os << StringPrintf("[MemMap: %p-%p prot=0x%x %s]",
                      mem_map.BaseBegin(), mem_map.BaseEnd(), mem_map.GetProtect(),
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 7c11ceb..efce09a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -61,8 +61,13 @@
   // a name.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapAnonymous(const char* ashmem_name, uint8_t* addr, size_t byte_count, int prot,
-                              bool low_4gb, bool reuse, std::string* error_msg);
+  static MemMap* MapAnonymous(const char* ashmem_name,
+                              uint8_t* addr,
+                              size_t byte_count,
+                              int prot,
+                              bool low_4gb,
+                              bool reuse,
+                              std::string* error_msg);
 
   // Create placeholder for a region allocated by direct call to mmap.
   // This is useful when we do not have control over the code calling mmap,
@@ -74,21 +79,43 @@
   // "start" offset is absolute, not relative.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapFile(size_t byte_count, int prot, int flags, int fd, off_t start,
-                         const char* filename, std::string* error_msg) {
-    return MapFileAtAddress(
-        nullptr, byte_count, prot, flags, fd, start, false, filename, error_msg);
+  static MemMap* MapFile(size_t byte_count,
+                         int prot,
+                         int flags,
+                         int fd,
+                         off_t start,
+                         bool low_4gb,
+                         const char* filename,
+                         std::string* error_msg) {
+    return MapFileAtAddress(nullptr,
+                            byte_count,
+                            prot,
+                            flags,
+                            fd,
+                            start,
+                            /*low_4gb*/low_4gb,
+                            /*reuse*/false,
+                            filename,
+                            error_msg);
   }
 
-  // Map part of a file, taking care of non-page aligned offsets.  The
-  // "start" offset is absolute, not relative. This version allows
-  // requesting a specific address for the base of the
-  // mapping. "reuse" allows us to create a view into an existing
-  // mapping where we do not take ownership of the memory.
+  // Map part of a file, taking care of non-page aligned offsets.  The "start" offset is absolute,
+  // not relative. This version allows requesting a specific address for the base of the mapping.
+  // "reuse" allows us to create a view into an existing mapping where we do not take ownership of
+  // the memory. If error_msg is null then we do not print /proc/maps to the log if
+  // MapFileAtAddress fails. This helps improve performance of the fail case since reading and
+  // printing /proc/maps takes several milliseconds in the worst case.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
-  static MemMap* MapFileAtAddress(uint8_t* addr, size_t byte_count, int prot, int flags, int fd,
-                                  off_t start, bool reuse, const char* filename,
+  static MemMap* MapFileAtAddress(uint8_t* addr,
+                                  size_t byte_count,
+                                  int prot,
+                                  int flags,
+                                  int fd,
+                                  off_t start,
+                                  bool low_4gb,
+                                  bool reuse,
+                                  const char* filename,
                                   std::string* error_msg);
 
   // Releases the memory mapping.
@@ -138,7 +165,9 @@
   }
 
   // Unmap the pages at end and remap them to create another memory map.
-  MemMap* RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
+  MemMap* RemapAtEnd(uint8_t* new_end,
+                     const char* tail_name,
+                     int tail_prot,
                      std::string* error_msg);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
@@ -152,8 +181,14 @@
   static void Shutdown() REQUIRES(!Locks::mem_maps_lock_);
 
  private:
-  MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_begin, size_t base_size,
-         int prot, bool reuse, size_t redzone_size = 0) REQUIRES(!Locks::mem_maps_lock_);
+  MemMap(const std::string& name,
+         uint8_t* begin,
+         size_t size,
+         void* base_begin,
+         size_t base_size,
+         int prot,
+         bool reuse,
+         size_t redzone_size = 0) REQUIRES(!Locks::mem_maps_lock_);
 
   static void DumpMapsLocked(std::ostream& os, bool terse)
       REQUIRES(Locks::mem_maps_lock_);
@@ -164,6 +199,15 @@
   static bool ContainedWithinExistingMap(uint8_t* ptr, size_t size, std::string* error_msg)
       REQUIRES(!Locks::mem_maps_lock_);
 
+  // Internal version of mmap that supports low 4gb emulation.
+  static void* MapInternal(void* addr,
+                           size_t length,
+                           int prot,
+                           int flags,
+                           int fd,
+                           off_t offset,
+                           bool low_4gb);
+
   const std::string name_;
   uint8_t* const begin_;  // Start of data.
   size_t size_;  // Length of data.
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 13bf5b7..edcbcf2 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -18,21 +18,36 @@
 
 #include <memory>
 
+#include "common_runtime_test.h"
 #include "base/memory_tool.h"
-
-#include "gtest/gtest.h"
+#include "base/unix_file/fd_file.h"
 
 namespace art {
 
-class MemMapTest : public testing::Test {
+class MemMapTest : public CommonRuntimeTest {
  public:
   static uint8_t* BaseBegin(MemMap* mem_map) {
     return reinterpret_cast<uint8_t*>(mem_map->base_begin_);
   }
+
   static size_t BaseSize(MemMap* mem_map) {
     return mem_map->base_size_;
   }
 
+  static uint8_t* GetValidMapAddress(size_t size, bool low_4gb) {
+    // Find a valid map address and unmap it before returning.
+    std::string error_msg;
+    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("temp",
+                                                     nullptr,
+                                                     size,
+                                                     PROT_READ,
+                                                     low_4gb,
+                                                     false,
+                                                     &error_msg));
+    CHECK(map != nullptr);
+    return map->Begin();
+  }
+
   static void RemapAtEndTest(bool low_4gb) {
     std::string error_msg;
     // Cast the page size to size_t.
@@ -164,14 +179,36 @@
   ASSERT_TRUE(error_msg.empty());
   ASSERT_LT(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 1ULL << 32);
 }
+TEST_F(MemMapTest, MapFile32Bit) {
+  CommonInit();
+  std::string error_msg;
+  ScratchFile scratch_file;
+  constexpr size_t kMapSize = kPageSize;
+  std::unique_ptr<uint8_t[]> data(new uint8_t[kMapSize]());
+  ASSERT_TRUE(scratch_file.GetFile()->WriteFully(&data[0], kMapSize));
+  std::unique_ptr<MemMap> map(MemMap::MapFile(/*byte_count*/kMapSize,
+                                              PROT_READ,
+                                              MAP_PRIVATE,
+                                              scratch_file.GetFd(),
+                                              /*start*/0,
+                                              /*low_4gb*/true,
+                                              scratch_file.GetFilename().c_str(),
+                                              &error_msg));
+  ASSERT_TRUE(map != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_EQ(map->Size(), kMapSize);
+  ASSERT_LT(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 1ULL << 32);
+}
 #endif
 
 TEST_F(MemMapTest, MapAnonymousExactAddr) {
   CommonInit();
   std::string error_msg;
+  // Find a valid address.
+  uint8_t* valid_address = GetValidMapAddress(kPageSize, /*low_4gb*/false);
   // Map at an address that should work, which should succeed.
   std::unique_ptr<MemMap> map0(MemMap::MapAnonymous("MapAnonymous0",
-                                                    reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS),
+                                                    valid_address,
                                                     kPageSize,
                                                     PROT_READ | PROT_WRITE,
                                                     false,
@@ -179,7 +216,7 @@
                                                     &error_msg));
   ASSERT_TRUE(map0.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
-  ASSERT_TRUE(map0->BaseBegin() == reinterpret_cast<void*>(ART_BASE_ADDRESS));
+  ASSERT_TRUE(map0->BaseBegin() == valid_address);
   // Map at an unspecified address, which should succeed.
   std::unique_ptr<MemMap> map1(MemMap::MapAnonymous("MapAnonymous1",
                                                     nullptr,
@@ -217,18 +254,27 @@
   CommonInit();
   // This test may not work under valgrind.
   if (RUNNING_ON_MEMORY_TOOL == 0) {
-    uintptr_t start_addr = ART_BASE_ADDRESS + 0x1000000;
+    constexpr size_t size = 0x100000;
+    // Try all addresses starting from 2GB to 4GB.
+    size_t start_addr = 2 * GB;
     std::string error_msg;
-    std::unique_ptr<MemMap> map(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
-                                                     reinterpret_cast<uint8_t*>(start_addr),
-                                                     0x21000000,
-                                                     PROT_READ | PROT_WRITE,
-                                                     true,
-                                                     false,
-                                                     &error_msg));
+    std::unique_ptr<MemMap> map;
+    for (; start_addr <= std::numeric_limits<uint32_t>::max() - size; start_addr += size) {
+      map.reset(MemMap::MapAnonymous("MapAnonymousExactAddr32bitHighAddr",
+                                     reinterpret_cast<uint8_t*>(start_addr),
+                                     size,
+                                     PROT_READ | PROT_WRITE,
+                                     /*low_4gb*/true,
+                                     false,
+                                     &error_msg));
+      if (map != nullptr) {
+        break;
+      }
+    }
+    ASSERT_GE(reinterpret_cast<uintptr_t>(map->End()), 2u * GB);
     ASSERT_TRUE(map.get() != nullptr) << error_msg;
     ASSERT_TRUE(error_msg.empty());
-    ASSERT_EQ(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), start_addr);
+    ASSERT_EQ(BaseBegin(map.get()), reinterpret_cast<void*>(start_addr));
   }
 }
 
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index ec7d758..b6f424b 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -394,6 +394,19 @@
   }
 }
 
+template <typename Visitor>
+inline void PointerArray::Fixup(mirror::PointerArray* dest,
+                                size_t pointer_size,
+                                const Visitor& visitor) {
+  for (size_t i = 0, count = GetLength(); i < count; ++i) {
+    void* ptr = GetElementPtrSize<void*>(i, pointer_size);
+    void* new_ptr = visitor(ptr);
+    if (ptr != new_ptr) {
+      dest->SetElementPtrSize<false, true>(i, new_ptr, pointer_size);
+    }
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index b27a884..50d77eb 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -122,7 +122,7 @@
   T Get(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_);
 
   T GetWithoutChecks(int32_t i) ALWAYS_INLINE SHARED_REQUIRES(Locks::mutator_lock_) {
-    DCHECK(CheckIsValidIndex(i));
+    DCHECK(CheckIsValidIndex(i)) << "i=" << i << " length=" << GetLength();
     return GetData()[i];
   }
 
@@ -190,6 +190,12 @@
   template<bool kTransactionActive = false, bool kUnchecked = false, typename T>
   void SetElementPtrSize(uint32_t idx, T element, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Fixup the pointers in the dest arrays by passing our pointers through the visitor. Only copies
+  // to dest if visitor(source_ptr) != source_ptr.
+  template <typename Visitor>
+  void Fixup(mirror::PointerArray* dest, size_t pointer_size, const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 };
 
 }  // namespace mirror
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 19ee7f4..9e416dc 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -520,15 +520,6 @@
   }
 }
 
-inline void Class::SetVerifyErrorClass(Class* klass) {
-  CHECK(klass != nullptr) << PrettyClass(this);
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass);
-  } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass);
-  }
-}
-
 template<VerifyObjectFlags kVerifyFlags>
 inline uint32_t Class::GetAccessFlags() {
   // Check class is loaded/retired or this is java.lang.String that has a
@@ -946,6 +937,57 @@
   return arr != nullptr ? arr->size() : 0u;
 }
 
+template <typename Visitor>
+inline void Class::FixupNativePointers(mirror::Class* dest,
+                                       size_t pointer_size,
+                                       const Visitor& visitor) {
+  // Update the field arrays.
+  LengthPrefixedArray<ArtField>* const sfields = GetSFieldsPtr();
+  LengthPrefixedArray<ArtField>* const new_sfields = visitor(sfields);
+  if (sfields != new_sfields) {
+    dest->SetSFieldsPtrUnchecked(new_sfields);
+  }
+  LengthPrefixedArray<ArtField>* const ifields = GetIFieldsPtr();
+  LengthPrefixedArray<ArtField>* const new_ifields = visitor(ifields);
+  if (ifields != new_ifields) {
+    dest->SetIFieldsPtrUnchecked(new_ifields);
+  }
+  // Update direct and virtual method arrays.
+  LengthPrefixedArray<ArtMethod>* direct_methods = GetDirectMethodsPtr();
+  LengthPrefixedArray<ArtMethod>* new_direct_methods = visitor(direct_methods);
+  if (direct_methods != new_direct_methods) {
+    dest->SetDirectMethodsPtrUnchecked(new_direct_methods);
+  }
+  LengthPrefixedArray<ArtMethod>* virtual_methods = GetVirtualMethodsPtr();
+  LengthPrefixedArray<ArtMethod>* new_virtual_methods = visitor(virtual_methods);
+  if (virtual_methods != new_virtual_methods) {
+    dest->SetVirtualMethodsPtr(new_virtual_methods);
+  }
+  // Update dex cache strings.
+  GcRoot<mirror::String>* strings = GetDexCacheStrings();
+  GcRoot<mirror::String>* new_strings = visitor(strings);
+  if (strings != new_strings) {
+    dest->SetDexCacheStrings(new_strings);
+  }
+  // Fix up embedded tables.
+  if (!IsTemp() && ShouldHaveEmbeddedImtAndVTable()) {
+    for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
+      ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
+      ArtMethod* new_method = visitor(method);
+      if (method != new_method) {
+        dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
+      }
+    }
+    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+      ArtMethod* method = GetEmbeddedImTableEntry(i, pointer_size);
+      ArtMethod* new_method = visitor(method);
+      if (method != new_method) {
+        dest->SetEmbeddedImTableEntry(i, new_method, pointer_size);
+      }
+    }
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9d01a1d..05a9039 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -57,6 +57,15 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+inline void Class::SetVerifyError(mirror::Object* error) {
+  CHECK(error != nullptr) << PrettyClass(this);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+  } else {
+    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+  }
+}
+
 void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) {
   Status old_status = h_this->GetStatus();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -85,36 +94,9 @@
       }
     }
 
-    // Stash current exception.
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
-    CHECK(old_exception.Get() != nullptr);
-    Class* eiie_class;
-    // Do't attempt to use FindClass if we have an OOM error since this can try to do more
-    // allocations and may cause infinite loops.
-    bool throw_eiie = (old_exception.Get() == nullptr);
-    if (!throw_eiie) {
-      std::string temp;
-      const char* old_exception_descriptor = old_exception->GetClass()->GetDescriptor(&temp);
-      throw_eiie = (strcmp(old_exception_descriptor, "Ljava/lang/OutOfMemoryError;") != 0);
-    }
-    if (throw_eiie) {
-      // Clear exception to call FindSystemClass.
-      self->ClearException();
-      eiie_class = Runtime::Current()->GetClassLinker()->FindSystemClass(
-          self, "Ljava/lang/ExceptionInInitializerError;");
-      CHECK(!self->IsExceptionPending());
-      // Only verification errors, not initialization problems, should set a verify error.
-      // This is to ensure that ThrowEarlierClassFailure will throw NoClassDefFoundError in that
-      // case.
-      Class* exception_class = old_exception->GetClass();
-      if (!eiie_class->IsAssignableFrom(exception_class)) {
-        h_this->SetVerifyErrorClass(exception_class);
-      }
-    }
-
-    // Restore exception.
-    self->SetException(old_exception.Get());
+    // Remember the current exception.
+    CHECK(self->GetException() != nullptr);
+    h_this->SetVerifyError(self->GetException());
   }
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
@@ -744,12 +726,12 @@
 void Class::SetPreverifiedFlagOnAllMethods(size_t pointer_size) {
   DCHECK(IsVerified());
   for (auto& m : GetDirectMethods(pointer_size)) {
-    if (!m.IsNative() && !m.IsAbstract()) {
+    if (!m.IsNative() && m.IsInvokable()) {
       m.SetPreverified();
     }
   }
   for (auto& m : GetVirtualMethods(pointer_size)) {
-    if (!m.IsNative() && !m.IsAbstract()) {
+    if (!m.IsNative() && m.IsInvokable()) {
       m.SetPreverified();
     }
   }
@@ -820,6 +802,18 @@
   }
 }
 
+mirror::Class* Class::GetCommonSuperClass(Handle<Class> klass) {
+  DCHECK(klass.Get() != nullptr);
+  DCHECK(!klass->IsInterface());
+  DCHECK(!IsInterface());
+  mirror::Class* common_super_class = this;
+  while (!common_super_class->IsAssignableFrom(klass.Get())) {
+    common_super_class = common_super_class->GetSuperClass();
+  }
+  DCHECK(common_super_class != nullptr);
+  return common_super_class;
+}
+
 const char* Class::GetSourceFile() {
   const DexFile& dex_file = GetDexFile();
   const DexFile::ClassDef* dex_class_def = GetClassDef();
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 8219d69..0ab5b97 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -657,6 +657,10 @@
 
   ALWAYS_INLINE Class* GetSuperClass() SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Get first common super class. It will never return null.
+  // `This` and `klass` must be classes.
+  Class* GetCommonSuperClass(Handle<Class> klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
   void SetSuperClass(Class *new_super_class) SHARED_REQUIRES(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
     Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
@@ -1015,9 +1019,9 @@
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* GetVerifyErrorClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  Object* GetVerifyError() SHARED_REQUIRES(Locks::mutator_lock_) {
     // DCHECK(IsErroneous());
-    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_));
+    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_));
   }
 
   uint16_t GetDexClassDefIndex() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1157,8 +1161,15 @@
   ALWAYS_INLINE LengthPrefixedArray<ArtMethod>* GetVirtualMethodsPtrUnchecked()
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Fix up all of the native pointers in the class by running them through the visitor. Only sets
+  // the corresponding entry in dest if visitor(obj) != obj to prevent dirty memory. Dest should be
+  // initialized to a copy of *this to prevent issues.
+  template <typename Visitor>
+  void FixupNativePointers(mirror::Class* dest, size_t pointer_size, const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
-  void SetVerifyErrorClass(Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+  void SetVerifyError(Object* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool throw_on_failure, bool use_referrers_cache>
   bool ResolvedFieldAccessTest(Class* access_to, ArtField* field,
@@ -1185,7 +1196,7 @@
 
   bool ProxyDescriptorEquals(const char* match) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  // Check that the pointer size mathces the one in the class linker.
+  // Check that the pointer size matches the one in the class linker.
   ALWAYS_INLINE static void CheckPointerSize(size_t pointer_size);
 
   static MemberOffset EmbeddedImTableOffset(size_t pointer_size);
@@ -1230,8 +1241,9 @@
   // check for interfaces and return null.
   HeapReference<Class> super_class_;
 
-  // If class verify fails, we must return same error on subsequent tries.
-  HeapReference<Class> verify_error_class_;
+  // If class verify fails, we must return same error on subsequent tries. We may store either
+  // the class of the error, or an actual instance of Throwable here.
+  HeapReference<Object> verify_error_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index f8ccfb1..975af61 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -137,6 +137,32 @@
   }
 }
 
+template <typename Visitor>
+inline void DexCache::FixupStrings(GcRoot<mirror::String>* dest, const Visitor& visitor) {
+  GcRoot<mirror::String>* src = GetStrings();
+  for (size_t i = 0, count = NumStrings(); i < count; ++i) {
+    // TODO: Probably don't need read barrier for most callers.
+    mirror::String* source = src[i].Read();
+    mirror::String* new_source = visitor(source);
+    if (source != new_source) {
+      dest[i] = GcRoot<mirror::String>(new_source);
+    }
+  }
+}
+
+template <typename Visitor>
+inline void DexCache::FixupResolvedTypes(GcRoot<mirror::Class>* dest, const Visitor& visitor) {
+  GcRoot<mirror::Class>* src = GetResolvedTypes();
+  for (size_t i = 0, count = NumResolvedTypes(); i < count; ++i) {
+    // TODO: Probably don't need read barrier for most callers.
+    mirror::Class* source = src[i].Read();
+    mirror::Class* new_source = visitor(source);
+    if (source != new_source) {
+      dest[i] = GcRoot<mirror::Class>(new_source);
+    }
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 3144553..32eb595 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -61,6 +61,14 @@
   void Fixup(ArtMethod* trampoline, size_t pointer_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  template <typename Visitor>
+  void FixupStrings(GcRoot<mirror::String>* dest, const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  template <typename Visitor>
+  void FixupResolvedTypes(GcRoot<mirror::Class>* dest, const Visitor& visitor)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   String* GetLocation() SHARED_REQUIRES(Locks::mutator_lock_) {
     return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_));
   }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 5c12091..4603428 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -163,6 +163,7 @@
 #endif
 }
 
+template<bool kCasRelease>
 inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
@@ -181,10 +182,13 @@
         static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
     new_lw = lw;
     new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
-    // This CAS is a CAS release so that when GC updates all the fields of an object and then
-    // changes the object from gray to black, the field updates (stores) will be visible (won't be
-    // reordered after this CAS.)
-  } while (!CasLockWordWeakRelease(expected_lw, new_lw));
+    // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
+    // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
+    // an object and then changes the object from gray to black, the field updates (stores) will be
+    // visible (won't be reordered after this CAS.)
+  } while (!(kCasRelease ?
+             CasLockWordWeakRelease(expected_lw, new_lw) :
+             CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
 #elif USE_BROOKS_READ_BARRIER
   DCHECK(kUseBrooksReadBarrier);
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 5c6520f..71e704e 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -92,13 +92,13 @@
   void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_);
+
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
   void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_);
-#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
-  NO_RETURN
-#endif
+
+  template<bool kCasRelease = false>
   ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 45610dc..33aca03 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -109,12 +109,17 @@
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
   DCHECK(utf != nullptr);
-  size_t char_count = CountModifiedUtf8Chars(utf);
-  return AllocFromModifiedUtf8(self, char_count, utf);
+  size_t byte_count = strlen(utf);
+  size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
+  return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
+}
+
+String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+  return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                      const char* utf8_data_in) {
+                                      const char* utf8_data_in, int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   SetStringCountVisitor visitor(utf16_length);
   String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
@@ -122,7 +127,7 @@
     return nullptr;
   }
   uint16_t* utf16_data_out = string->GetValue();
-  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
   return string;
 }
 
@@ -217,7 +222,7 @@
   const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
-  ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
+  ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
   return result;
 }
 
@@ -254,7 +259,11 @@
   StackHandleScope<1> hs(self);
   Handle<String> string(hs.NewHandle(this));
   CharArray* result = CharArray::Alloc(self, GetLength());
-  memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
+  if (result != nullptr) {
+    memcpy(result->GetData(), string->GetValue(), string->GetLength() * sizeof(uint16_t));
+  } else {
+    self->AssertPendingOOMException();
+  }
   return result;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index fbee2d7..e2cfb8d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -27,6 +27,7 @@
 template<class T> class Handle;
 struct StringOffsets;
 class StringPiece;
+class StubTest_ReadBarrierForRoot_Test;
 
 namespace mirror {
 
@@ -115,6 +116,10 @@
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
+                                       const char* utf8_data_in, int32_t utf8_length)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -179,7 +184,7 @@
   static GcRoot<Class> java_lang_String_;
 
   friend struct art::StringOffsets;  // for verifying offset information
-  ART_FRIEND_TEST(ObjectTest, StringLength);  // for SetOffset and SetCount
+  ART_FRIEND_TEST(art::StubTest, ReadBarrierForRoot);  // For java_lang_String_.
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
 };
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 116cbe9..9946eab 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -50,6 +50,10 @@
 static constexpr uint32_t kAccFastNative =           0x00080000;  // method (dex only)
 static constexpr uint32_t kAccMiranda =              0x00200000;  // method (dex only)
 static constexpr uint32_t kAccDefault =              0x00400000;  // method (runtime)
+// This is set by the class linker during LinkInterfaceMethods. Prior to that point we do not know
+// if any particular method needs to be a default conflict. Used to figure out at runtime if
+// invoking this method will throw an exception.
+static constexpr uint32_t kAccDefaultConflict =      0x00800000;  // method (runtime)
 
 // Special runtime-only flags.
 // Interface and all its super-interfaces with default methods have been recursively initialized.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 81e7e6d..19c71f6 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -27,7 +27,7 @@
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -485,10 +485,7 @@
         DCHECK(why == kTimedWaiting || why == kSleeping) << why;
         self->GetWaitConditionVariable()->TimedWait(self, ms, ns);
       }
-      if (self->IsInterruptedLocked()) {
-        was_interrupted = true;
-      }
-      self->SetInterruptedLocked(false);
+      was_interrupted = self->IsInterruptedLocked();
     }
   }
 
@@ -522,7 +519,7 @@
 
   monitor_lock_.Unlock(self);
 
-  if (was_interrupted) {
+  if (was_interrupted && interruptShouldThrow) {
     /*
      * We were interrupted while waiting, or somebody interrupted an
      * un-interruptible thread earlier and we're bailing out immediately.
@@ -534,9 +531,7 @@
       MutexLock mu(self, *self->GetWaitMutex());
       self->SetInterruptedLocked(false);
     }
-    if (interruptShouldThrow) {
-      self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
-    }
+    self->ThrowNewException("Ljava/lang/InterruptedException;", nullptr);
   }
 }
 
@@ -1034,15 +1029,15 @@
   for (uint32_t monitor_dex_pc : monitor_enter_dex_pcs) {
     // The verifier works in terms of the dex pcs of the monitor-enter instructions.
     // We want the registers used by those instructions (so we can read the values out of them).
-    uint16_t monitor_enter_instruction = code_item->insns_[monitor_dex_pc];
+    const Instruction* monitor_enter_instruction =
+        Instruction::At(&code_item->insns_[monitor_dex_pc]);
 
     // Quick sanity check.
-    if ((monitor_enter_instruction & 0xff) != Instruction::MONITOR_ENTER) {
-      LOG(FATAL) << "expected monitor-enter @" << monitor_dex_pc << "; was "
-                 << reinterpret_cast<void*>(monitor_enter_instruction);
-    }
+    CHECK_EQ(monitor_enter_instruction->Opcode(), Instruction::MONITOR_ENTER)
+      << "expected monitor-enter @" << monitor_dex_pc << "; was "
+      << reinterpret_cast<const void*>(monitor_enter_instruction);
 
-    uint16_t monitor_register = ((monitor_enter_instruction >> 8) & 0xff);
+    uint16_t monitor_register = monitor_enter_instruction->VRegA();
     uint32_t value;
     bool success = stack_visitor->GetVReg(m, monitor_register, kReferenceVReg, &value);
     CHECK(success) << "Failed to read v" << monitor_register << " of kind "
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 8b2f4d8..da6cf1f 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -149,8 +149,15 @@
   void operator=(const NullableScopedUtfChars&);
 };
 
-static jobject DexFile_openDexFileNative(
-    JNIEnv* env, jclass, jstring javaSourceName, jstring javaOutputName, jint) {
+static jobject DexFile_openDexFileNative(JNIEnv* env,
+                                         jclass,
+                                         jstring javaSourceName,
+                                         jstring javaOutputName,
+                                         jint flags ATTRIBUTE_UNUSED,
+                                         // class_loader will be used for app images.
+                                         jobject class_loader ATTRIBUTE_UNUSED,
+                                         // dex_elements will be used for app images.
+                                         jobject dex_elements ATTRIBUTE_UNUSED) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == nullptr) {
     return 0;
@@ -159,7 +166,6 @@
   if (env->ExceptionCheck()) {
     return 0;
   }
-
   Runtime* const runtime = Runtime::Current();
   ClassLinker* linker = runtime->GetClassLinker();
   std::vector<std::unique_ptr<const DexFile>> dex_files;
@@ -441,7 +447,12 @@
   NATIVE_METHOD(DexFile, getDexOptNeeded,
                 "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative,
-                "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"),
+                "(Ljava/lang/String;"
+                "Ljava/lang/String;"
+                "I"
+                "Ljava/lang/ClassLoader;"
+                "[Ldalvik/system/DexPathList$Element;"
+                ")Ljava/lang/Object;"),
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4c5dc3a..b49d68f 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -562,17 +562,20 @@
 
 /*
  * This is called by the framework when it knows the application directory and
- * process name.  We use this information to start up the sampling profiler for
- * for ART.
+ * process name.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName,
-                                      jstring appDir ATTRIBUTE_UNUSED,
+static void VMRuntime_registerAppInfo(JNIEnv* env,
+                                      jclass clazz ATTRIBUTE_UNUSED,
+                                      jstring pkgName,
+                                      jstring appDir,
                                       jstring procName ATTRIBUTE_UNUSED) {
-  const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
-  std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
+  const char* appDirChars = env->GetStringUTFChars(appDir, nullptr);
+  const char* pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
+  std::string profileFile = StringPrintf("%s/code_cache/%s.prof", appDirChars, pkgNameChars);
 
-  Runtime::Current()->StartProfiler(profileFile.c_str());
+  Runtime::Current()->SetJitProfilingFilename(profileFile.c_str());
 
+  env->ReleaseStringUTFChars(appDir, appDirChars);
   env->ReleaseStringUTFChars(pkgName, pkgNameChars);
 }
 
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 1d06706..ae1a4d7 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -64,8 +64,7 @@
     DEBUG_ENABLE_ASSERT             = 1 << 2,
     DEBUG_ENABLE_SAFEMODE           = 1 << 3,
     DEBUG_ENABLE_JNI_LOGGING        = 1 << 4,
-    DEBUG_ENABLE_JIT                = 1 << 5,
-    DEBUG_GENERATE_DEBUG_INFO       = 1 << 6,
+    DEBUG_GENERATE_DEBUG_INFO       = 1 << 5,
   };
 
   Runtime* const runtime = Runtime::Current();
@@ -97,21 +96,10 @@
   if (safe_mode) {
     // Ensure that any (secondary) oat files will be interpreted.
     runtime->AddCompilerOption("--compiler-filter=interpret-only");
+    runtime->SetSafeMode(true);
     debug_flags &= ~DEBUG_ENABLE_SAFEMODE;
   }
 
-  bool use_jit = false;
-  if ((debug_flags & DEBUG_ENABLE_JIT) != 0) {
-    if (safe_mode) {
-      LOG(INFO) << "Not enabling JIT due to safe mode";
-    } else {
-      use_jit = true;
-      LOG(INFO) << "Late-enabling JIT";
-    }
-    debug_flags &= ~DEBUG_ENABLE_JIT;
-  }
-  runtime->GetJITOptions()->SetUseJIT(use_jit);
-
   const bool generate_debug_info = (debug_flags & DEBUG_GENERATE_DEBUG_INFO) != 0;
   if (generate_debug_info) {
     runtime->AddCompilerOption("--generate-debug-info");
@@ -171,23 +159,17 @@
         proc_name = StringPrintf("%u", static_cast<uint32_t>(pid));
       }
 
-      std::string profiles_dir(GetDalvikCache("profiles", false /* create_if_absent */));
-      if (!profiles_dir.empty()) {
-        std::string trace_file = StringPrintf("%s/%s.trace.bin", profiles_dir.c_str(),
-                                              proc_name.c_str());
-        Trace::Start(trace_file.c_str(),
-                     -1,
-                     buffer_size,
-                     0,   // TODO: Expose flags.
-                     output_mode,
-                     trace_mode,
-                     0);  // TODO: Expose interval.
-        if (thread->IsExceptionPending()) {
-          ScopedObjectAccess soa(env);
-          thread->ClearException();
-        }
-      } else {
-        LOG(ERROR) << "Profiles dir is empty?!?!";
+      std::string trace_file = StringPrintf("/data/misc/trace/%s.trace.bin", proc_name.c_str());
+      Trace::Start(trace_file.c_str(),
+                   -1,
+                   buffer_size,
+                   0,   // TODO: Expose flags.
+                   output_mode,
+                   trace_mode,
+                   0);  // TODO: Expose interval.
+      if (thread->IsExceptionPending()) {
+        ScopedObjectAccess soa(env);
+        thread->ClearException();
       }
     }
   }
@@ -199,9 +181,9 @@
     if (isa != kNone && isa != kRuntimeISA) {
       action = Runtime::NativeBridgeAction::kInitialize;
     }
-    Runtime::Current()->DidForkFromZygote(env, action, isa_string.c_str());
+    Runtime::Current()->InitNonZygoteOrPostFork(env, action, isa_string.c_str());
   } else {
-    Runtime::Current()->DidForkFromZygote(env, Runtime::NativeBridgeAction::kUnload, nullptr);
+    Runtime::Current()->InitNonZygoteOrPostFork(env, Runtime::NativeBridgeAction::kUnload, nullptr);
   }
 }
 
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 680f4ac..83e594b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -52,10 +52,10 @@
 
 namespace art {
 
-// Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
+// Whether OatFile::Open will try dlopen. Fallback is our own ELF loader.
 static constexpr bool kUseDlopen = true;
 
-// Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
+// Whether OatFile::Open will try dlopen on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
@@ -64,348 +64,140 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
-std::string OatFile::ResolveRelativeEncodedDexLocation(
-      const char* abs_dex_location, const std::string& rel_dex_location) {
-  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
-    // Strip :classes<N>.dex used for secondary multidex files.
-    std::string base = DexFile::GetBaseLocation(rel_dex_location);
-    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+// Note for OatFileBase and descendents:
+//
+// These are used in OatFile::Open to try all our loaders.
+//
+// The process is simple:
+//
+// 1) Allocate an instance through the standard constructor (location, executable)
+// 2) Load() to try to open the file.
+// 3) ComputeFields() to populate the OatFile fields like begin_, using FindDynamicSymbolAddress.
+// 4) PreSetup() for any steps that should be done before the final setup.
+// 5) Setup() to complete the procedure.
 
-    // Check if the base is a suffix of the provided abs_dex_location.
-    std::string target_suffix = "/" + base;
-    std::string abs_location(abs_dex_location);
-    if (abs_location.size() > target_suffix.size()) {
-      size_t pos = abs_location.size() - target_suffix.size();
-      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
-        return abs_location + multidex_suffix;
-      }
-    }
-  }
-  return rel_dex_location;
-}
+class OatFileBase : public OatFile {
+ public:
+  virtual ~OatFileBase() {}
 
-void OatFile::CheckLocation(const std::string& location) {
-  CHECK(!location.empty());
-}
-
-OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+  template <typename kOatFileBaseSubType>
+  static OatFileBase* OpenOatFile(const std::string& elf_filename,
                                   const std::string& location,
+                                  uint8_t* requested_base,
+                                  uint8_t* oat_file_begin,
+                                  bool writable,
+                                  bool executable,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
-  oat_file->elf_file_.reset(elf_file);
-  uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
-  CHECK(has_section);
-  oat_file->begin_ = elf_file->Begin() + offset;
-  oat_file->end_ = elf_file->Begin() + size + offset;
-  // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
-}
+                                  std::string* error_msg);
 
-OatFile* OatFile::Open(const std::string& filename,
-                       const std::string& location,
-                       uint8_t* requested_base,
-                       uint8_t* oat_file_begin,
-                       bool executable,
-                       const char* abs_dex_location,
-                       std::string* error_msg) {
-  CHECK(!filename.empty()) << location;
-  CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+ protected:
+  OatFileBase(const std::string& filename, bool executable) : OatFile(filename, executable) {}
 
-  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
-  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
-  //       !executable is a sign that we may want to patch), which may not be allowed for
-  //       various reasons.
-  // dlopen always returns the same library if it is already opened on the host. For this reason
-  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
-  // the same library loaded multiple times at different addresses is required for class unloading
-  // and for having dex caches arrays in the .bss section.
-  Runtime* const runtime = Runtime::Current();
-  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
-  if (kUseDlopen && executable) {
-    bool success = kIsTargetBuild;
-    bool reserved_location = false;
-      // Manager may be null if we are running without a runtime.
-    if (!success && kUseDlopenOnHost && manager != nullptr) {
-      // RegisterOatFileLocation returns false if we are not the first caller to register that
-      // location.
-      reserved_location = manager->RegisterOatFileLocation(location);
-      success = reserved_location;
-    }
-    if (success) {
-      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-      // this will register the oat file with the linker and allows libunwind to find our info.
-      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-      if (reserved_location) {
-        manager->UnRegisterOatFileLocation(location);
-      }
-      if (ret != nullptr) {
-        return ret.release();
-      }
-      if (kPrintDlOpenErrorMessage) {
-        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
-      }
-    }
+  virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                                  std::string* error_msg) const = 0;
+
+  virtual bool Load(const std::string& elf_filename,
+                    uint8_t* oat_file_begin,
+                    bool writable,
+                    bool executable,
+                    std::string* error_msg) = 0;
+
+  bool ComputeFields(uint8_t* requested_base,
+                     const std::string& file_path,
+                     std::string* error_msg);
+
+  virtual void PreSetup(const std::string& elf_filename) = 0;
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
+
+  // Setters exposed for ElfOatFile.
+
+  void SetBegin(const uint8_t* begin) {
+    begin_ = begin;
   }
 
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  //
-  //
-  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
-  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+  void SetEnd(const uint8_t* end) {
+    end_ = end;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OatFileBase);
+};
+
+template <typename kOatFileBaseSubType>
+OatFileBase* OatFileBase::OpenOatFile(const std::string& elf_filename,
+                                      const std::string& location,
+                                      uint8_t* requested_base,
+                                      uint8_t* oat_file_begin,
+                                      bool writable,
+                                      bool executable,
+                                      const char* abs_dex_location,
+                                      std::string* error_msg) {
+  std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+  if (!ret->Load(elf_filename,
+                 oat_file_begin,
+                 writable,
+                 executable,
+                 error_msg)) {
     return nullptr;
   }
-  ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        abs_dex_location, error_msg));
 
-  // It would be nice to unlink here. But we might have opened the file created by the
-  // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
-  // to allow removal when we know the ELF must be borked.
+  if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
+    return nullptr;
+  }
+
+  ret->PreSetup(elf_filename);
+
+  if (!ret->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
-  if (!success) {
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile* OatFile::OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
-  bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
-  if (!success) {
-    CHECK(!error_msg->empty());
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
-      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
-  CHECK(!location_.empty());
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().RegisterOatFileLocation(location);
-  }
-}
-
-OatFile::~OatFile() {
-  STLDeleteElements(&oat_dex_files_storage_);
-  if (dlopen_handle_ != nullptr) {
-    dlclose(dlopen_handle_);
-  }
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
-  }
-}
-
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
-#ifdef __APPLE__
-  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
-  // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
-  return false;
-#else
-  {
-    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
-    if (absolute_path == nullptr) {
-      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
-      return false;
-    }
-#ifdef __ANDROID__
-    android_dlextinfo extinfo;
-    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
-    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
-#else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
-#endif
-  }
-  if (dlopen_handle_ == nullptr) {
-    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
-    return false;
-  }
-  begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatdata"));
+bool OatFileBase::ComputeFields(uint8_t* requested_base,
+                                const std::string& file_path,
+                                std::string* error_msg) {
+  std::string symbol_error_msg;
+  begin_ = FindDynamicSymbolAddress("oatdata", &symbol_error_msg);
   if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p, %s. See process maps in the log.",
-                              begin_, requested_base, elf_filename.c_str());
+        "oatdata=%p != expected=%p. See process maps in the log.",
+        begin_, requested_base);
     return false;
   }
-  end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatlastword"));
+  end_ = FindDynamicSymbolAddress("oatlastword", &symbol_error_msg);
   if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbss"));
+  bss_begin_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbss", &symbol_error_msg));
   if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
+    // No .bss section.
     bss_end_ = nullptr;
-    dlerror();
   } else {
-    bss_end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbsslastword"));
+    bss_end_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbsslastword", &symbol_error_msg));
     if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                elf_filename.c_str());
+      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file_path.c_str());
       return false;
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
   }
 
-  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
-  struct dl_iterate_context {
-    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
-      auto* context = reinterpret_cast<dl_iterate_context*>(data);
-      // See whether this callback corresponds to the file which we have just loaded.
-      bool contains_begin = false;
-      for (int i = 0; i < info->dlpi_phnum; i++) {
-        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                      info->dlpi_phdr[i].p_vaddr);
-          size_t memsz = info->dlpi_phdr[i].p_memsz;
-          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
-            contains_begin = true;
-            break;
-          }
-        }
-      }
-      // Add dummy mmaps for this file.
-      if (contains_begin) {
-        for (int i = 0; i < info->dlpi_phnum; i++) {
-          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                        info->dlpi_phdr[i].p_vaddr);
-            size_t memsz = info->dlpi_phdr[i].p_memsz;
-            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
-            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
-          }
-        }
-        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
-      }
-      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
-    }
-    const uint8_t* const begin_;
-    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { begin_, &dlopen_mmaps_ };
-
-  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
-  }
-
-  return Setup(abs_dex_location, error_msg);
-#endif  // __APPLE__
-}
-
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
-                          const char* abs_dex_location,
-                          std::string* error_msg) {
-  // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
-                                oat_file_begin));
-  if (elf_file_ == nullptr) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  bool loaded = elf_file_->Load(executable, error_msg);
-  if (!loaded) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  begin_ = elf_file_->FindDynamicSymbolAddress("oatdata");
-  if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p. See process maps in the log.",
-                              begin_, requested_base);
-    return false;
-  }
-  end_ = elf_file_->FindDynamicSymbolAddress("oatlastword");
-  if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  // Readjust to be non-inclusive upper bound.
-  end_ += sizeof(uint32_t);
-
-  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
-  if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
-    bss_end_ = nullptr;
-    dlerror();
-  } else {
-    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
-    if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                file->GetPath().c_str());
-      return false;
-    }
-    // Readjust to be non-inclusive upper bound.
-    bss_end_ += sizeof(uint32_t);
-  }
-
-  return Setup(abs_dex_location, error_msg);
+  return true;
 }
 
 // Read an unaligned entry from the OatDexFile data in OatFile and advance the read
@@ -428,7 +220,7 @@
   return true;
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s",
@@ -630,6 +422,486 @@
   return true;
 }
 
+////////////////////////
+// OatFile via dlopen //
+////////////////////////
+
+static bool RegisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
+    }
+    return false;
+  }
+  return true;
+}
+
+static void UnregisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
+    }
+  }
+}
+
+class DlOpenOatFile FINAL : public OatFileBase {
+ public:
+  DlOpenOatFile(const std::string& filename, bool executable)
+      : OatFileBase(filename, executable),
+        dlopen_handle_(nullptr),
+        first_oat_(RegisterOatFileLocation(filename)) {
+  }
+
+  ~DlOpenOatFile() {
+    if (dlopen_handle_ != nullptr) {
+      dlclose(dlopen_handle_);
+    }
+    UnregisterOatFileLocation(GetLocation());
+  }
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr =
+        reinterpret_cast<const uint8_t*>(dlsym(dlopen_handle_, symbol_name.c_str()));
+    if (ptr == nullptr) {
+      *error_msg = dlerror();
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  void PreSetup(const std::string& elf_filename) OVERRIDE;
+
+ private:
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* oat_file_begin,
+              std::string* error_msg);
+
+  // dlopen handle during runtime.
+  void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
+
+  // Dummy memory map objects corresponding to the regions mapped by dlopen.
+  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
+
+  // Track the registration status (= was this the first oat file) for the location.
+  const bool first_oat_;
+
+  DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
+};
+
+bool DlOpenOatFile::Load(const std::string& elf_filename,
+                         uint8_t* oat_file_begin,
+                         bool writable,
+                         bool executable,
+                         std::string* error_msg) {
+  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
+  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
+  //       !executable is a sign that we may want to patch), which may not be allowed for
+  //       various reasons.
+  if (!kUseDlopen) {
+    *error_msg = "DlOpen is disabled.";
+    return false;
+  }
+  if (writable) {
+    *error_msg = "DlOpen does not support writable loading.";
+    return false;
+  }
+  if (!executable) {
+    *error_msg = "DlOpen does not support non-executable loading.";
+    return false;
+  }
+
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  if (!kIsTargetBuild) {
+    if (!kUseDlopenOnHost) {
+      *error_msg = "DlOpen disabled for host.";
+      return false;
+    }
+    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
+    // stored in the first_oat_ flag.
+    if (!first_oat_) {
+      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
+      return false;
+    }
+  }
+
+  bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
+  DCHECK(dlopen_handle_ != nullptr || !success);
+
+  return success;
+}
+
+bool DlOpenOatFile::Dlopen(const std::string& elf_filename,
+                           uint8_t* oat_file_begin,
+                           std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename, oat_file_begin);
+  *error_msg = "Dlopen unsupported on Mac.";
+  return false;
+#else
+  {
+    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
+    if (absolute_path == nullptr) {
+      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
+      return false;
+    }
+#ifdef __ANDROID__
+    android_dlextinfo extinfo;
+    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
+                                                                //   (open oat files multiple
+                                                                //    times).
+                    ANDROID_DLEXT_FORCE_FIXED_VADDR;            // Take a non-zero vaddr as absolute
+                                                                //   (non-pic boot image).
+    if (oat_file_begin != nullptr) {                            //
+      extinfo.flags |= ANDROID_DLEXT_LOAD_AT_FIXED_ADDRESS;     // Use the requested addr if
+      extinfo.reserved_addr = oat_file_begin;                   // vaddr = 0.
+    }                                                           //   (pic boot image).
+    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
+#else
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    UNUSED(oat_file_begin);
+#endif
+  }
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
+    return false;
+  }
+  return true;
+#endif
+}
+
+void DlOpenOatFile::PreSetup(const std::string& elf_filename) {
+#ifdef __APPLE__
+  UNUSED(elf_filename);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      // See whether this callback corresponds to the file which we have just loaded.
+      bool contains_begin = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+              info->dlpi_phdr[i].p_vaddr);
+          size_t memsz = info->dlpi_phdr[i].p_memsz;
+          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
+            contains_begin = true;
+            break;
+          }
+        }
+      }
+      // Add dummy mmaps for this file.
+      if (contains_begin) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+                info->dlpi_phdr[i].p_vaddr);
+            size_t memsz = info->dlpi_phdr[i].p_memsz;
+            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
+            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
+          }
+        }
+        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
+      }
+      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
+    }
+    const uint8_t* const begin_;
+    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
+  } context = { Begin(), &dlopen_mmaps_ };
+
+  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+  }
+#endif
+}
+
+////////////////////////////////////////////////
+// OatFile via our own ElfFile implementation //
+////////////////////////////////////////////////
+
+class ElfOatFile FINAL : public OatFileBase {
+ public:
+  ElfOatFile(const std::string& filename, bool executable) : OatFileBase(filename, executable) {}
+
+  static ElfOatFile* OpenElfFile(File* file,
+                                 const std::string& location,
+                                 uint8_t* requested_base,
+                                 uint8_t* oat_file_begin,  // Override base if not null
+                                 bool writable,
+                                 bool executable,
+                                 const char* abs_dex_location,
+                                 std::string* error_msg);
+
+  bool InitializeFromElfFile(ElfFile* elf_file,
+                             const char* abs_dex_location,
+                             std::string* error_msg);
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr = elf_file_->FindDynamicSymbolAddress(symbol_name);
+    if (ptr == nullptr) {
+      *error_msg = "(Internal implementation could not find symbol)";
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  void PreSetup(const std::string& elf_filename ATTRIBUTE_UNUSED) OVERRIDE {
+  }
+
+ private:
+  bool ElfFileOpen(File* file,
+                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                   bool writable,
+                   bool executable,
+                   std::string* error_msg);
+
+ private:
+  // Backing memory map for oat file during cross compilation.
+  std::unique_ptr<ElfFile> elf_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfOatFile);
+};
+
+ElfOatFile* ElfOatFile::OpenElfFile(File* file,
+                                    const std::string& location,
+                                    uint8_t* requested_base,
+                                    uint8_t* oat_file_begin,  // Override base if not null
+                                    bool writable,
+                                    bool executable,
+                                    const char* abs_dex_location,
+                                    std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, executable));
+  bool success = oat_file->ElfFileOpen(file, oat_file_begin, writable, executable, error_msg);
+  if (!success) {
+    CHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  // Complete the setup.
+  if (!oat_file->ComputeFields(requested_base, file->GetPath(), error_msg)) {
+    return nullptr;
+  }
+
+  if (!oat_file->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
+  return oat_file.release();
+}
+
+bool ElfOatFile::InitializeFromElfFile(ElfFile* elf_file,
+                                       const char* abs_dex_location,
+                                       std::string* error_msg) {
+  if (IsExecutable()) {
+    *error_msg = "Cannot initialize from elf file in executable mode.";
+    return false;
+  }
+  elf_file_.reset(elf_file);
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  SetBegin(elf_file->Begin() + offset);
+  SetEnd(elf_file->Begin() + size + offset);
+  // Ignore the optional .bss section when opening non-executable.
+  return Setup(abs_dex_location, error_msg);
+}
+
+bool ElfOatFile::Load(const std::string& elf_filename,
+                      uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                      bool writable,
+                      bool executable,
+                      std::string* error_msg) {
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  if (file == nullptr) {
+    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+    return false;
+  }
+  return ElfOatFile::ElfFileOpen(file.get(),
+                                 oat_file_begin,
+                                 writable,
+                                 executable,
+                                 error_msg);
+}
+
+bool ElfOatFile::ElfFileOpen(File* file,
+                             uint8_t* oat_file_begin,
+                             bool writable,
+                             bool executable,
+                             std::string* error_msg) {
+  // TODO: rename requested_base to oat_data_begin
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                error_msg,
+                                oat_file_begin));
+  if (elf_file_ == nullptr) {
+    DCHECK(!error_msg->empty());
+    return false;
+  }
+  bool loaded = elf_file_->Load(executable, error_msg);
+  DCHECK(loaded || !error_msg->empty());
+  return loaded;
+}
+
+//////////////////////////
+// General OatFile code //
+//////////////////////////
+
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
+static void CheckLocation(const std::string& location) {
+  CHECK(!location.empty());
+}
+
+OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
+                                  const char* abs_dex_location,
+                                  std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, false /* executable */));
+  return oat_file->InitializeFromElfFile(elf_file, abs_dex_location, error_msg)
+      ? oat_file.release()
+      : nullptr;
+}
+
+OatFile* OatFile::Open(const std::string& filename,
+                       const std::string& location,
+                       uint8_t* requested_base,
+                       uint8_t* oat_file_begin,
+                       bool executable,
+                       const char* abs_dex_location,
+                       std::string* error_msg) {
+  CHECK(!filename.empty()) << location;
+  CheckLocation(location);
+  std::unique_ptr<OatFile> ret;
+
+  // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
+  // disabled.
+  OatFile* with_dlopen = OatFileBase::OpenOatFile<DlOpenOatFile>(filename,
+                                                                 location,
+                                                                 requested_base,
+                                                                 oat_file_begin,
+                                                                 false,
+                                                                 executable,
+                                                                 abs_dex_location,
+                                                                 error_msg);
+  if (with_dlopen != nullptr) {
+    return with_dlopen;
+  }
+  if (kPrintDlOpenErrorMessage) {
+    LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+  }
+
+  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+  //
+  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+  //
+  // We use our own ELF loader for Quick to deal with legacy apps that
+  // open a generated dex file by name, remove the file, then open
+  // another generated dex file with the same name. http://b/10614658
+  //
+  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+  //
+  //
+  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
+  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
+  OatFile* with_internal = OatFileBase::OpenOatFile<ElfOatFile>(filename,
+                                                                location,
+                                                                requested_base,
+                                                                oat_file_begin,
+                                                                false,
+                                                                executable,
+                                                                abs_dex_location,
+                                                                error_msg);
+  return with_internal;
+}
+
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 true,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 false,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile::OatFile(const std::string& location, bool is_executable)
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
+  CHECK(!location_.empty());
+}
+
+OatFile::~OatFile() {
+  STLDeleteElements(&oat_dex_files_storage_);
+}
+
 const OatHeader& OatFile::GetOatHeader() const {
   return *reinterpret_cast<const OatHeader*>(Begin());
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 0a77654..dbd7541 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,7 +40,7 @@
 class OatHeader;
 class OatDexFile;
 
-class OatFile FINAL {
+class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
 
@@ -74,7 +74,7 @@
                                const char* abs_dex_location,
                                std::string* error_msg);
 
-  ~OatFile();
+  virtual ~OatFile();
 
   bool IsExecutable() const {
     return is_executable_;
@@ -85,12 +85,6 @@
   // Indicates whether the oat file was compiled with full debugging capability.
   bool IsDebuggable() const;
 
-  ElfFile* GetElfFile() const {
-    CHECK_NE(reinterpret_cast<uintptr_t>(elf_file_.get()), reinterpret_cast<uintptr_t>(nullptr))
-        << "Cannot get an elf file from " << GetLocation();
-    return elf_file_.get();
-  }
-
   const std::string& GetLocation() const {
     return location_;
   }
@@ -260,35 +254,10 @@
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
                                               std::vector<std::string>* locations);
 
+ protected:
+  OatFile(const std::string& filename, bool executable);
+
  private:
-  static void CheckLocation(const std::string& location);
-
-  static OatFile* OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg);
-
-  static OatFile* OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,  // Override base if not null
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg);
-
-  explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
-                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
-                   const char* abs_dex_location,
-                   std::string* error_msg);
-
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
-
   // The oat file name.
   //
   // The image will embed this to link its associated oat file.
@@ -309,18 +278,6 @@
   // Was this oat_file loaded executable?
   const bool is_executable_;
 
-  // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  std::unique_ptr<MemMap> mem_map_;
-
-  // Backing memory map for oat file during cross compilation.
-  std::unique_ptr<ElfFile> elf_file_;
-
-  // dlopen handle during runtime.
-  void* dlopen_handle_;
-
-  // Dummy memory map objects corresponding to the regions mapped by dlopen.
-  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
-
   // Owning storage for the OatDexFile objects.
   std::vector<const OatDexFile*> oat_dex_files_storage_;
 
@@ -356,6 +313,7 @@
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatFile);
 };
 
@@ -426,6 +384,7 @@
   uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 99080f6..0f3a013 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -846,7 +846,7 @@
 
 std::string OatFileAssistant::ImageLocation() {
   Runtime* runtime = Runtime::Current();
-  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+  const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetBootImageSpace();
   if (image_space == nullptr) {
     return "";
   }
@@ -949,21 +949,23 @@
     image_info_load_attempted_ = true;
 
     Runtime* runtime = Runtime::Current();
-    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetBootImageSpace();
     if (image_space != nullptr) {
       cached_image_info_.location = image_space->GetImageLocation();
 
       if (isa_ == kRuntimeISA) {
         const ImageHeader& image_header = image_space->GetImageHeader();
         cached_image_info_.oat_checksum = image_header.GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
+        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
+            image_header.GetOatDataBegin());
         cached_image_info_.patch_delta = image_header.GetPatchDelta();
       } else {
         std::unique_ptr<ImageHeader> image_header(
             gc::space::ImageSpace::ReadImageHeaderOrDie(
                 cached_image_info_.location.c_str(), isa_));
         cached_image_info_.oat_checksum = image_header->GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
+        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
+            image_header->GetOatDataBegin());
         cached_image_info_.patch_delta = image_header->GetPatchDelta();
       }
     }
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index c54d7f8..8c7efb2 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -223,7 +223,7 @@
         false, dex_location.c_str(), &error_msg));
     ASSERT_TRUE(odex_file.get() != nullptr) << error_msg;
 
-    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetImageSpace();
+    const gc::space::ImageSpace* image_space = runtime->GetHeap()->GetBootImageSpace();
     ASSERT_TRUE(image_space != nullptr);
     const ImageHeader& image_header = image_space->GetImageHeader();
     const OatHeader& oat_header = odex_file->GetOatHeader();
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 9eee156..ea6d3ff 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -79,11 +79,8 @@
 }
 
 const OatFile* OatFileManager::GetBootOatFile() const {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
-  if (image_space == nullptr) {
-    return nullptr;
-  }
-  return image_space->GetOatFile();
+  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
+  return (image_space == nullptr) ? nullptr : image_space->GetOatFile();
 }
 
 const OatFile* OatFileManager::GetPrimaryOatFile() const {
diff --git a/runtime/os.h b/runtime/os.h
index befe2e8..46d89fb 100644
--- a/runtime/os.h
+++ b/runtime/os.h
@@ -39,6 +39,10 @@
   // already exists, it is *not* overwritten, but unlinked, and a new inode will be used.
   static File* CreateEmptyFile(const char* name);
 
+  // Create an empty file with write access. This is a *new* file, that is, if the file
+  // already exists, it is *not* overwritten, but unlinked, and a new inode will be used.
+  static File* CreateEmptyFileWriteOnly(const char* name);
+
   // Open a file with the specified open(2) flags.
   static File* OpenFileWithFlags(const char* name, int flags);
 
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 675699d..f45e9f6 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -35,12 +35,20 @@
   return OpenFileWithFlags(name, O_RDWR);
 }
 
-File* OS::CreateEmptyFile(const char* name) {
+static File* CreateEmptyFile(const char* name, int extra_flags) {
   // In case the file exists, unlink it so we get a new file. This is necessary as the previous
   // file may be in use and must not be changed.
   unlink(name);
 
-  return OpenFileWithFlags(name, O_RDWR | O_CREAT | O_TRUNC);
+  return OS::OpenFileWithFlags(name, O_CREAT | extra_flags);
+}
+
+File* OS::CreateEmptyFile(const char* name) {
+  return art::CreateEmptyFile(name, O_RDWR | O_TRUNC);
+}
+
+File* OS::CreateEmptyFileWriteOnly(const char* name) {
+  return art::CreateEmptyFile(name, O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC);
 }
 
 File* OS::OpenFileWithFlags(const char* name, int flags) {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index ae16c7f..7f4519c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -152,15 +152,21 @@
           .WithType<bool>()
           .WithValueMap({{"false", false}, {"true", true}})
           .IntoKey(M::UseJIT)
-      .Define("-Xjitcodecachesize:_")
+      .Define("-Xjitinitialsize:_")
           .WithType<MemoryKiB>()
-          .IntoKey(M::JITCodeCacheCapacity)
+          .IntoKey(M::JITCodeCacheInitialCapacity)
+      .Define("-Xjitmaxsize:_")
+          .WithType<MemoryKiB>()
+          .IntoKey(M::JITCodeCacheMaxCapacity)
       .Define("-Xjitthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITCompileThreshold)
       .Define("-Xjitwarmupthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITWarmupThreshold)
+      .Define("-Xjitsaveprofilinginfo")
+          .WithValue(true)
+          .IntoKey(M::JITSaveProfilingInfo)
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
@@ -553,7 +559,9 @@
     args.Set(M::Image, image);
   }
 
-  if (args.GetOrDefault(M::HeapGrowthLimit) == 0u) {  // 0 means no growth limit
+  // 0 means no growth limit, and growth limit should be always <= heap size
+  if (args.GetOrDefault(M::HeapGrowthLimit) <= 0u ||
+      args.GetOrDefault(M::HeapGrowthLimit) > args.GetOrDefault(M::MemoryMaximumSize)) {
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
@@ -640,7 +648,6 @@
   UsageMessage(stream, "  -XX:ForegroundHeapGrowthMultiplier=doublevalue\n");
   UsageMessage(stream, "  -XX:LowMemoryMode\n");
   UsageMessage(stream, "  -Xprofile:{threadcpuclock,wallclock,dualclock}\n");
-  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitthreshold:integervalue\n");
   UsageMessage(stream, "\n");
 
@@ -684,6 +691,8 @@
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Xpatchoat:filename\n");
   UsageMessage(stream, "  -Xusejit:booleanvalue\n");
+  UsageMessage(stream, "  -Xjitinitialsize:N\n");
+  UsageMessage(stream, "  -Xjitmaxsize:N\n");
   UsageMessage(stream, "  -X[no]relocate\n");
   UsageMessage(stream, "  -X[no]dex2oat (Whether to invoke dex2oat on the application)\n");
   UsageMessage(stream, "  -X[no]image-dex2oat (Whether to create and use a boot image)\n");
@@ -718,6 +727,7 @@
   UsageMessage(stream, "  -Xjitblocking\n");
   UsageMessage(stream, "  -Xjitmethod:signature[,signature]* (eg Ljava/lang/String\\;replace)\n");
   UsageMessage(stream, "  -Xjitclass:classname[,classname]*\n");
+  UsageMessage(stream, "  -Xjitcodecachesize:N\n");
   UsageMessage(stream, "  -Xjitoffset:offset[,offset]\n");
   UsageMessage(stream, "  -Xjitconfig:filename\n");
   UsageMessage(stream, "  -Xjitcheckcg\n");
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index c7c2709..6234720 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -507,6 +507,8 @@
 
 TEST_F(ReflectionTest, StaticMainMethod) {
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK();
+  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
   StackHandleScope<1> hs(soa.Self());
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 556ba56..fe8eb0d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -208,7 +208,8 @@
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
       oat_file_manager_(nullptr),
-      is_low_memory_mode_(false) {
+      is_low_memory_mode_(false),
+      safe_mode_(false) {
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
 }
@@ -217,6 +218,9 @@
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
+
+  MaybeSaveJitProfilingInfo();
+
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -528,13 +532,13 @@
   // Use !IsAotCompiler so that we get test coverage, tests are never the zygote.
   if (!IsAotCompiler()) {
     ScopedObjectAccess soa(self);
-    gc::space::ImageSpace* image_space = heap_->GetImageSpace();
+    gc::space::ImageSpace* image_space = heap_->GetBootImageSpace();
     if (image_space != nullptr) {
       ATRACE_BEGIN("AddImageStringsToTable");
       GetInternTable()->AddImageStringsToTable(image_space);
       ATRACE_END();
       ATRACE_BEGIN("MoveImageClassesToClassTable");
-      GetClassLinker()->MoveImageClassesToClassTable();
+      GetClassLinker()->AddBootImageClassesToClassTable();
       ATRACE_END();
     }
   }
@@ -573,8 +577,9 @@
     if (is_native_bridge_loaded_) {
       PreInitializeNativeBridge(".");
     }
-    DidForkFromZygote(self->GetJniEnv(), NativeBridgeAction::kInitialize,
-                      GetInstructionSetString(kRuntimeISA));
+    InitNonZygoteOrPostFork(self->GetJniEnv(),
+                            NativeBridgeAction::kInitialize,
+                            GetInstructionSetString(kRuntimeISA));
   }
 
   ATRACE_BEGIN("StartDaemonThreads");
@@ -599,7 +604,6 @@
       LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
       return true;
     }
-    StartProfiler(profile_output_filename_.c_str());
   }
 
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
@@ -663,7 +667,7 @@
 #endif
 }
 
-void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa) {
+void Runtime::InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
   if (is_native_bridge_loaded_) {
@@ -685,8 +689,9 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-  if (jit_.get() == nullptr && jit_options_->UseJIT()) {
-    // Create the JIT if the flag is set and we haven't already create it (happens for run-tests).
+  if (!safe_mode_ && jit_options_->UseJIT() && jit_.get() == nullptr) {
+    // Note that when running ART standalone (not zygote, nor zygote fork),
+    // the jit may have already been created.
     CreateJit();
   }
 
@@ -770,7 +775,7 @@
   std::unique_ptr<const OatFile> oat_file(
       OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
   if (oat_file == nullptr) {
-    LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
+    LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
 
@@ -820,6 +825,7 @@
 void Runtime::SetSentinel(mirror::Object* sentinel) {
   CHECK(sentinel_.Read() == nullptr);
   CHECK(sentinel != nullptr);
+  CHECK(!heap_->IsMovableObject(sentinel));
   sentinel_ = GcRoot<mirror::Object>(sentinel);
 }
 
@@ -930,7 +936,7 @@
                        runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs));
   ATRACE_END();
 
-  if (heap_->GetImageSpace() == nullptr && !allow_dex_file_fallback_) {
+  if (heap_->GetBootImageSpace() == nullptr && !allow_dex_file_fallback_) {
     LOG(ERROR) << "Dex file fallback disabled, cannot continue without image.";
     ATRACE_END();
     return false;
@@ -1040,10 +1046,15 @@
   class_linker_ = new ClassLinker(intern_table_);
   if (GetHeap()->HasImageSpace()) {
     ATRACE_BEGIN("InitFromImage");
-    class_linker_->InitFromImage();
+    std::string error_msg;
+    bool result = class_linker_->InitFromImage(&error_msg);
     ATRACE_END();
+    if (!result) {
+      LOG(ERROR) << "Could not initialize from image: " << error_msg;
+      return false;
+    }
     if (kIsDebugBuild) {
-      GetHeap()->GetImageSpace()->VerifyImageAllocations();
+      GetHeap()->GetBootImageSpace()->VerifyImageAllocations();
     }
     if (boot_class_path_string_.empty()) {
       // The bootclasspath is not explicitly specified: construct it from the loaded dex files.
@@ -1073,7 +1084,11 @@
                  runtime_options.GetOrDefault(Opt::Image),
                  &boot_class_path);
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
-    class_linker_->InitWithoutImage(std::move(boot_class_path));
+    std::string error_msg;
+    if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
+      LOG(ERROR) << "Could not initialize without image: " << error_msg;
+      return false;
+    }
 
     // TODO: Should we move the following to InitWithoutImage?
     SetInstructionSet(instruction_set_);
@@ -1605,10 +1620,8 @@
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
-void Runtime::StartProfiler(const char* profile_output_filename) {
+void Runtime::SetJitProfilingFilename(const char* profile_output_filename) {
   profile_output_filename_ = profile_output_filename;
-  profiler_started_ =
-      BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_);
 }
 
 // Transaction support.
@@ -1754,8 +1767,16 @@
   argv->push_back(feature_string);
 }
 
+void Runtime::MaybeSaveJitProfilingInfo() {
+  if (jit_.get() != nullptr && !profile_output_filename_.empty()) {
+    jit_->SaveProfilingInfo(profile_output_filename_);
+  }
+}
+
 void Runtime::UpdateProfilerState(int state) {
-  VLOG(profiler) << "Profiler state updated to " << state;
+  if (state == kProfileBackground) {
+    MaybeSaveJitProfilingInfo();
+  }
 }
 
 void Runtime::CreateJit() {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 7b1fdb2..bd36414 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -151,6 +151,11 @@
     return compiler_callbacks_;
   }
 
+  void SetCompilerCallbacks(CompilerCallbacks* callbacks) {
+    CHECK(callbacks != nullptr);
+    compiler_callbacks_ = callbacks;
+  }
+
   bool IsZygote() const {
     return is_zygote_;
   }
@@ -442,7 +447,7 @@
 
   void PreZygoteFork();
   bool InitZygote();
-  void DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa);
+  void InitNonZygoteOrPostFork(JNIEnv* env, NativeBridgeAction action, const char* isa);
 
   const instrumentation::Instrumentation* GetInstrumentation() const {
     return &instrumentation_;
@@ -452,7 +457,7 @@
     return &instrumentation_;
   }
 
-  void StartProfiler(const char* profile_output_filename);
+  void SetJitProfilingFilename(const char* profile_output_filename);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -583,6 +588,10 @@
   double GetHashTableMinLoadFactor() const;
   double GetHashTableMaxLoadFactor() const;
 
+  void SetSafeMode(bool mode) {
+    safe_mode_ = mode;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -599,12 +608,14 @@
   void StartDaemonThreads();
   void StartSignalCatcher();
 
+  void MaybeSaveJitProfilingInfo();
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
   // NOTE: these must match the gc::ProcessState values as they come directly from the framework.
   static constexpr int kProfileForground = 0;
-  static constexpr int kProfileBackgrouud = 1;
+  static constexpr int kProfileBackground = 1;
 
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
   uint64_t callee_save_methods_[kLastCalleeSaveType];
@@ -786,6 +797,9 @@
   // Whether or not we are on a low RAM device.
   bool is_low_memory_mode_;
 
+  // Whether the application should run in safe mode, that is, interpreter only.
+  bool safe_mode_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 7b5bc1a..5624285 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -69,7 +69,9 @@
 RUNTIME_OPTIONS_KEY (bool,                UseJIT,                         false)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITCompileThreshold,            jit::Jit::kDefaultCompileThreshold)
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
-RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheCapacity,           jit::JitCodeCache::kDefaultCapacity)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
+RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
+RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
@@ -80,10 +82,7 @@
 RUNTIME_OPTIONS_KEY (bool,                Relocate,                       kDefaultMustRelocate)
 RUNTIME_OPTIONS_KEY (bool,                Dex2Oat,                        true)
 RUNTIME_OPTIONS_KEY (bool,                ImageDex2Oat,                   true)
-                                                        // kUseReadBarrier currently works with
-                                                        // the interpreter only.
-                                                        // TODO: make it work with the compiler.
-RUNTIME_OPTIONS_KEY (bool,                Interpret,                      kUseReadBarrier) // -Xint
+RUNTIME_OPTIONS_KEY (bool,                Interpret,                      false) // -Xint
                                                         // Disable the compiler for CC (for now).
 RUNTIME_OPTIONS_KEY (XGcOption,           GcOption)  // -Xgc:
 RUNTIME_OPTIONS_KEY (gc::space::LargeObjectSpaceType, \
@@ -97,7 +96,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
-RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/method-trace-file.bin")
+RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/misc/trace/method-trace-file.bin")
 RUNTIME_OPTIONS_KEY (unsigned int,        MethodTraceFileSize,            10 * MB)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTraceStreaming)
 RUNTIME_OPTIONS_KEY (TraceClockSource,    ProfileClock,                   kDefaultTraceClockSource)  // -Xprofile:
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index 7ac17b6..4e62dda 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -92,7 +92,7 @@
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
   }
-  iterator Put(const K& k, const V&& v) {
+  iterator Put(const K& k, V&& v) {
     std::pair<iterator, bool> result = map_.emplace(k, std::move(v));
     DCHECK(result.second);  // Check we didn't accidentally overwrite an existing value.
     return result.first;
@@ -105,7 +105,7 @@
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
     return map_.emplace_hint(pos, k, v);
   }
-  iterator PutBefore(iterator pos, const K& k, const V&& v) {
+  iterator PutBefore(iterator pos, const K& k, V&& v) {
     // Check that we're using the correct position and the key is not in the map.
     DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first));
     DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k));
diff --git a/runtime/stack.cc b/runtime/stack.cc
index d7edfad..9098d38 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -265,7 +265,12 @@
     }
   } else {
     DCHECK(cur_shadow_frame_ != nullptr);
-    *val = cur_shadow_frame_->GetVReg(vreg);
+    if (kind == kReferenceVReg) {
+      *val = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(
+          cur_shadow_frame_->GetVRegReference(vreg)));
+    } else {
+      *val = cur_shadow_frame_->GetVReg(vreg);
+    }
     return true;
   }
 }
@@ -481,52 +486,10 @@
   return true;
 }
 
-bool StackVisitor::SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value,
+bool StackVisitor::SetVReg(ArtMethod* m,
+                           uint16_t vreg,
+                           uint32_t new_value,
                            VRegKind kind) {
-  if (cur_quick_frame_ != nullptr) {
-    DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-    DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return false;
-    } else {
-      return SetVRegFromQuickCode(m, vreg, new_value, kind);
-    }
-  } else {
-    cur_shadow_frame_->SetVReg(vreg, new_value);
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                                        VRegKind kind) {
-  DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-  DCHECK(m == GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind, &vmap_offset)) {
-    bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg = vmap_table.ComputeRegister(spill_mask, vmap_offset, kind);
-    return SetRegisterIfAccessible(reg, new_value, kind);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *addr = new_value;
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegFromDebugger(ArtMethod* m,
-                                       uint16_t vreg,
-                                       uint32_t new_value,
-                                       VRegKind kind) {
   const DexFile::CodeItem* code_item = m->GetCodeItem();
   if (code_item == nullptr) {
     return false;
@@ -551,93 +514,11 @@
   return true;
 }
 
-bool StackVisitor::SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind) {
-  const bool is_float = (kind == kFloatVReg) || (kind == kDoubleLoVReg) || (kind == kDoubleHiVReg);
-  if (!IsAccessibleRegister(reg, is_float)) {
-    return false;
-  }
-  const bool target64 = Is64BitInstructionSet(kRuntimeISA);
-
-  // Create a new value that can hold both low 32 and high 32 bits, in
-  // case we are running 64 bits.
-  uintptr_t full_new_value = new_value;
-  // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
-  if (target64) {
-    bool wide_lo = (kind == kLongLoVReg) || (kind == kDoubleLoVReg);
-    bool wide_hi = (kind == kLongHiVReg) || (kind == kDoubleHiVReg);
-    if (wide_lo || wide_hi) {
-      uintptr_t old_reg_val = GetRegister(reg, is_float);
-      uint64_t new_vreg_portion = static_cast<uint64_t>(new_value);
-      uint64_t old_reg_val_as_wide = static_cast<uint64_t>(old_reg_val);
-      uint64_t mask = 0xffffffff;
-      if (wide_lo) {
-        mask = mask << 32;
-      } else {
-        new_vreg_portion = new_vreg_portion << 32;
-      }
-      full_new_value = static_cast<uintptr_t>((old_reg_val_as_wide & mask) | new_vreg_portion);
-    }
-  }
-  SetRegister(reg, full_new_value, is_float);
-  return true;
-}
-
-bool StackVisitor::SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                               VRegKind kind_lo, VRegKind kind_hi) {
-  if (kind_lo == kLongLoVReg) {
-    DCHECK_EQ(kind_hi, kLongHiVReg);
-  } else if (kind_lo == kDoubleLoVReg) {
-    DCHECK_EQ(kind_hi, kDoubleHiVReg);
-  } else {
-    LOG(FATAL) << "Expected long or double: kind_lo=" << kind_lo << ", kind_hi=" << kind_hi;
-  }
-  if (cur_quick_frame_ != nullptr) {
-    DCHECK(context_ != nullptr);  // You can't reliably write registers without a context.
-    DCHECK(m == GetMethod());
-    if (cur_oat_quick_method_header_->IsOptimized()) {
-      return false;
-    } else {
-      return SetVRegPairFromQuickCode(m, vreg, new_value, kind_lo, kind_hi);
-    }
-  } else {
-    DCHECK(cur_shadow_frame_ != nullptr);
-    cur_shadow_frame_->SetVRegLong(vreg, new_value);
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegPairFromQuickCode(
-    ArtMethod* m, uint16_t vreg, uint64_t new_value, VRegKind kind_lo, VRegKind kind_hi) {
-  DCHECK_EQ(m, GetMethod());
-  const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
-  QuickMethodFrameInfo frame_info = method_header->GetFrameInfo();
-  const VmapTable vmap_table(method_header->GetVmapTable());
-  uint32_t vmap_offset_lo, vmap_offset_hi;
-  // TODO: IsInContext stops before spotting floating point registers.
-  if (vmap_table.IsInContext(vreg, kind_lo, &vmap_offset_lo) &&
-      vmap_table.IsInContext(vreg + 1, kind_hi, &vmap_offset_hi)) {
-    bool is_float = (kind_lo == kDoubleLoVReg);
-    uint32_t spill_mask = is_float ? frame_info.FpSpillMask() : frame_info.CoreSpillMask();
-    uint32_t reg_lo = vmap_table.ComputeRegister(spill_mask, vmap_offset_lo, kind_lo);
-    uint32_t reg_hi = vmap_table.ComputeRegister(spill_mask, vmap_offset_hi, kind_hi);
-    return SetRegisterPairIfAccessible(reg_lo, reg_hi, new_value, is_float);
-  } else {
-    const DexFile::CodeItem* code_item = m->GetCodeItem();
-    DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                      // its instructions?
-    uint32_t* addr = GetVRegAddrFromQuickCode(
-        cur_quick_frame_, code_item, frame_info.CoreSpillMask(),
-        frame_info.FpSpillMask(), frame_info.FrameSizeInBytes(), vreg);
-    *reinterpret_cast<uint64_t*>(addr) = new_value;
-    return true;
-  }
-}
-
-bool StackVisitor::SetVRegPairFromDebugger(ArtMethod* m,
-                                           uint16_t vreg,
-                                           uint64_t new_value,
-                                           VRegKind kind_lo,
-                                           VRegKind kind_hi) {
+bool StackVisitor::SetVRegPair(ArtMethod* m,
+                               uint16_t vreg,
+                               uint64_t new_value,
+                               VRegKind kind_lo,
+                               VRegKind kind_hi) {
   if (kind_lo == kLongLoVReg) {
     DCHECK_EQ(kind_hi, kLongHiVReg);
   } else if (kind_lo == kDoubleLoVReg) {
@@ -666,25 +547,6 @@
   return true;
 }
 
-bool StackVisitor::SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi,
-                                               uint64_t new_value, bool is_float) {
-  if (!IsAccessibleRegister(reg_lo, is_float) || !IsAccessibleRegister(reg_hi, is_float)) {
-    return false;
-  }
-  uintptr_t new_value_lo = static_cast<uintptr_t>(new_value & 0xFFFFFFFF);
-  uintptr_t new_value_hi = static_cast<uintptr_t>(new_value >> 32);
-  bool target64 = Is64BitInstructionSet(kRuntimeISA);
-  // Deal with 32 or 64-bit wide registers in a way that builds on all targets.
-  if (target64) {
-    DCHECK_EQ(reg_lo, reg_hi);
-    SetRegister(reg_lo, new_value, is_float);
-  } else {
-    SetRegister(reg_lo, new_value_lo, is_float);
-    SetRegister(reg_hi, new_value_hi, is_float);
-  }
-  return true;
-}
-
 bool StackVisitor::IsAccessibleGPR(uint32_t reg) const {
   DCHECK(context_ != nullptr);
   return context_->IsAccessibleGPR(reg);
@@ -702,12 +564,6 @@
   return context_->GetGPR(reg);
 }
 
-void StackVisitor::SetGPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
-  DCHECK(context_ != nullptr);
-  context_->SetGPR(reg, value);
-}
-
 bool StackVisitor::IsAccessibleFPR(uint32_t reg) const {
   DCHECK(context_ != nullptr);
   return context_->IsAccessibleFPR(reg);
@@ -719,12 +575,6 @@
   return context_->GetFPR(reg);
 }
 
-void StackVisitor::SetFPR(uint32_t reg, uintptr_t value) {
-  DCHECK(cur_quick_frame_ != nullptr) << "This is a quick frame routine";
-  DCHECK(context_ != nullptr);
-  context_->SetFPR(reg, value);
-}
-
 uintptr_t StackVisitor::GetReturnPc() const {
   uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
   DCHECK(sp != nullptr);
@@ -865,8 +715,8 @@
   CHECK(code_start <= pc && pc <= (code_start + code_size))
       << PrettyMethod(method)
       << " pc=" << std::hex << pc
-      << " code=" << code
-      << " size=" << code_size;
+      << " code_start=" << code_start
+      << " code_size=" << code_size;
 }
 
 void StackVisitor::SanityCheckFrame() const {
diff --git a/runtime/stack.h b/runtime/stack.h
index aa7b616..a0c44cb 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -600,22 +600,18 @@
                    uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
+  // is triggered to make the values effective.
   bool SetVReg(ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
   // is triggered to make the values effective.
-  bool SetVRegFromDebugger(ArtMethod* m, uint16_t vreg, uint32_t new_value, VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool SetVRegPair(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                   VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  // Values will be set in debugger shadow frames. Debugger will make sure deoptimization
-  // is triggered to make the values effective.
-  bool SetVRegPairFromDebugger(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                               VRegKind kind_lo, VRegKind kind_hi)
+  bool SetVRegPair(ArtMethod* m,
+                   uint16_t vreg,
+                   uint64_t new_value,
+                   VRegKind kind_lo,
+                   VRegKind kind_hi)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   uintptr_t* GetGPRAddress(uint32_t reg) const;
@@ -749,22 +745,12 @@
     DCHECK(IsAccessibleRegister(reg, is_float));
     return is_float ? GetFPR(reg) : GetGPR(reg);
   }
-  void SetRegister(uint32_t reg, uintptr_t value, bool is_float) {
-    DCHECK(IsAccessibleRegister(reg, is_float));
-    if (is_float) {
-      SetFPR(reg, value);
-    } else {
-      SetGPR(reg, value);
-    }
-  }
 
   bool IsAccessibleGPR(uint32_t reg) const;
   uintptr_t GetGPR(uint32_t reg) const;
-  void SetGPR(uint32_t reg, uintptr_t value);
 
   bool IsAccessibleFPR(uint32_t reg) const;
   uintptr_t GetFPR(uint32_t reg) const;
-  void SetFPR(uint32_t reg, uintptr_t value);
 
   bool GetVRegFromDebuggerShadowFrame(uint16_t vreg, VRegKind kind, uint32_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -789,19 +775,6 @@
                                    uint64_t* val) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  bool SetVRegFromQuickCode(ArtMethod* m, uint16_t vreg, uint32_t new_value,
-                            VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool SetRegisterIfAccessible(uint32_t reg, uint32_t new_value, VRegKind kind)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
-  bool SetVRegPairFromQuickCode(ArtMethod* m, uint16_t vreg, uint64_t new_value,
-                                VRegKind kind_lo, VRegKind kind_hi)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  bool SetRegisterPairIfAccessible(uint32_t reg_lo, uint32_t reg_hi, uint64_t new_value,
-                                   bool is_float)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
   void SanityCheckFrame() const SHARED_REQUIRES(Locks::mutator_lock_);
 
   InlineInfo GetCurrentInlineInfo() const SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index b0cf418..63e6326 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1886,6 +1886,14 @@
   return result;
 }
 
+bool Thread::IsJWeakCleared(jweak obj) const {
+  CHECK(obj != nullptr);
+  IndirectRef ref = reinterpret_cast<IndirectRef>(obj);
+  IndirectRefKind kind = GetIndirectRefKind(ref);
+  CHECK_EQ(kind, kWeakGlobal);
+  return tlsPtr_.jni_env->vm->IsWeakGlobalCleared(const_cast<Thread*>(this), ref);
+}
+
 // Implements java.lang.Thread.interrupted.
 bool Thread::Interrupted() {
   MutexLock mu(Thread::Current(), *wait_mutex_);
@@ -2509,6 +2517,7 @@
   QUICK_ENTRY_POINT_INFO(pNewStringFromStringBuilder)
   QUICK_ENTRY_POINT_INFO(pReadBarrierJni)
   QUICK_ENTRY_POINT_INFO(pReadBarrierSlow)
+  QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow)
 #undef QUICK_ENTRY_POINT_INFO
 
   os << offset;
diff --git a/runtime/thread.h b/runtime/thread.h
index 138c143..4624f27 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -445,6 +445,8 @@
 
   // Convert a jobject into a Object*
   mirror::Object* DecodeJObject(jobject obj) const SHARED_REQUIRES(Locks::mutator_lock_);
+  // Checks if the weak global ref has been cleared by the GC without decoding it.
+  bool IsJWeakCleared(jweak obj) const SHARED_REQUIRES(Locks::mutator_lock_);
 
   mirror::Object* GetMonitorEnterObject() const SHARED_REQUIRES(Locks::mutator_lock_) {
     return tlsPtr_.monitor_enter_object;
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index b09b87f..a390908 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -948,7 +948,12 @@
   Locks::mutator_lock_->ExclusiveLock(self);
   Locks::mutator_lock_->ExclusiveUnlock(self);
 #endif
-  AssertThreadsAreSuspended(self, self, debug_thread);
+  // Disabled for the following race condition:
+  // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock.
+  // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected).
+  // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable
+  // state (from SetStateUnsafe).
+  // AssertThreadsAreSuspended(self, self, debug_thread);
 
   VLOG(threads) << *self << " SuspendAllForDebugger complete";
 }
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 0527d3a..5a4dfb8 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -82,6 +82,11 @@
   }
 }
 
+void ThreadPool::RemoveAllTasks(Thread* self) {
+  MutexLock mu(self, task_queue_lock_);
+  tasks_.clear();
+}
+
 ThreadPool::ThreadPool(const char* name, size_t num_threads)
   : name_(name),
     task_queue_lock_("task queue lock"),
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index a2338d6..6cd4ad3 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -91,6 +91,9 @@
   // after running it, it is the caller's responsibility.
   void AddTask(Thread* self, Task* task) REQUIRES(!task_queue_lock_);
 
+  // Remove all tasks in the queue.
+  void RemoveAllTasks(Thread* self) REQUIRES(!task_queue_lock_);
+
   ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index ab342aa..5815f7a 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -331,7 +331,7 @@
   std::unique_ptr<File> trace_file;
   if (output_mode != TraceOutputMode::kDDMS) {
     if (trace_fd < 0) {
-      trace_file.reset(OS::CreateEmptyFile(trace_filename));
+      trace_file.reset(OS::CreateEmptyFileWriteOnly(trace_filename));
     } else {
       trace_file.reset(new File(trace_fd, "tracefile"));
       trace_file->DisableAutoClose();
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 10600e2..5a11698 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -23,28 +23,50 @@
 
 namespace art {
 
+// This is used only from debugger and test code.
 size_t CountModifiedUtf8Chars(const char* utf8) {
+  return CountModifiedUtf8Chars(utf8, strlen(utf8));
+}
+
+/*
+ * This does not validate UTF8 rules (nor did older code). But it gets the right answer
+ * for valid UTF-8 and that's fine because it's used only to size a buffer for later
+ * conversion.
+ *
+ * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows:
+ * U+0001  - U+007F   0xxxxxxx
+ * U+0080  - U+07FF   110xxxxx 10xxxxxx
+ * U+0800  - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
+ * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from
+ * standard UTF-8).
+ * The four byte encoding converts to two utf16 characters.
+ */
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) {
+  DCHECK_LE(byte_count, strlen(utf8));
   size_t len = 0;
-  int ic;
-  while ((ic = *utf8++) != '\0') {
+  const char* end = utf8 + byte_count;
+  for (; utf8 < end; ++utf8) {
+    int ic = *utf8;
     len++;
-    if ((ic & 0x80) == 0) {
-      // one-byte encoding
+    if (LIKELY((ic & 0x80) == 0)) {
+      // One-byte encoding.
       continue;
     }
-    // two- or three-byte encoding
+    // Two- or three-byte encoding.
     utf8++;
     if ((ic & 0x20) == 0) {
-      // two-byte encoding
+      // Two-byte encoding.
       continue;
     }
     utf8++;
     if ((ic & 0x10) == 0) {
-      // three-byte encoding
+      // Three-byte encoding.
       continue;
     }
 
-    // four-byte encoding: needs to be converted into a surrogate
+    // Four-byte encoding: needs to be converted into a surrogate
     // pair.
     utf8++;
     len++;
@@ -52,6 +74,7 @@
   return len;
 }
 
+// This is used only from debugger and test code.
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
   while (*utf8_data_in != '\0') {
     const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in);
@@ -65,13 +88,53 @@
   }
 }
 
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars,
+                                const char* utf8_data_in, size_t in_bytes) {
+  const char *in_start = utf8_data_in;
+  const char *in_end = utf8_data_in + in_bytes;
+  uint16_t *out_p = utf16_data_out;
+
+  if (LIKELY(out_chars == in_bytes)) {
+    // Common case where all characters are ASCII.
+    for (const char *p = in_start; p < in_end;) {
+      // Safe even if char is signed because ASCII characters always have
+      // the high bit cleared.
+      *out_p++ = dchecked_integral_cast<uint16_t>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
+  for (const char *p = in_start; p < in_end;) {
+    const uint32_t ch = GetUtf16FromUtf8(&p);
+    const uint16_t leading = GetLeadingUtf16Char(ch);
+    const uint16_t trailing = GetTrailingUtf16Char(ch);
+
+    *out_p++ = leading;
+    if (trailing != 0) {
+      *out_p++ = trailing;
+    }
+  }
+}
+
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count) {
+  if (LIKELY(byte_count == char_count)) {
+    // Common case where all characters are ASCII.
+    const uint16_t *utf16_end = utf16_in + char_count;
+    for (const uint16_t *p = utf16_in; p < utf16_end;) {
+      *utf8_out++ = dchecked_integral_cast<char>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
   while (char_count--) {
     const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
-      // char_count == 0 here implies we've encountered an unpaired
+      // Char_count == 0 here implies we've encountered an unpaired
       // surrogate and we have no choice but to encode it as 3-byte UTF
       // sequence. Note that unpaired surrogates can occur as a part of
       // "normal" operation.
@@ -161,34 +224,31 @@
 
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
-  while (char_count--) {
+  const uint16_t *end = chars + char_count;
+  while (chars < end) {
     const uint16_t ch = *chars++;
-    if (ch > 0 && ch <= 0x7f) {
-      ++result;
-    } else if (ch >= 0xd800 && ch <= 0xdbff) {
-      if (char_count > 0) {
+    if (LIKELY(ch != 0 && ch < 0x80)) {
+      result++;
+      continue;
+    }
+    if (ch < 0x800) {
+      result += 2;
+      continue;
+    }
+    if (ch >= 0xd800 && ch < 0xdc00) {
+      if (chars < end) {
         const uint16_t ch2 = *chars;
         // If we find a properly paired surrogate, we emit it as a 4 byte
         // UTF sequence. If we find an unpaired leading or trailing surrogate,
         // we emit it as a 3 byte sequence like would have done earlier.
-        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+        if (ch2 >= 0xdc00 && ch2 < 0xe000) {
           chars++;
-          char_count--;
-
           result += 4;
-        } else {
-          result += 3;
+          continue;
         }
-      } else {
-        // This implies we found an unpaired trailing surrogate at the end
-        // of a string.
-        result += 3;
       }
-    } else if (ch > 0x7ff) {
-      result += 3;
-    } else {
-      result += 2;
     }
+    result += 3;
   }
   return result;
 }
diff --git a/runtime/utf.h b/runtime/utf.h
index 1193d29..03158c4 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -40,6 +40,7 @@
  * Returns the number of UTF-16 characters in the given modified UTF-8 string.
  */
 size_t CountModifiedUtf8Chars(const char* utf8);
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count);
 
 /*
  * Returns the number of modified UTF-8 bytes needed to represent the given
@@ -51,6 +52,8 @@
  * Convert from Modified UTF-8 to UTF-16.
  */
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in);
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars,
+                                const char* utf8_in, size_t in_bytes);
 
 /*
  * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner
@@ -71,7 +74,8 @@
  * this anyway, so if you want a NUL-terminated string, you know where to
  * put the NUL byte.
  */
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count);
 
 /*
  * The java.lang.String hashCode() algorithm.
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 94a6ea5..5239e40 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,7 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <map>
 #include <vector>
 
 namespace art {
@@ -48,7 +49,7 @@
 };
 
 // A test string that contains a UTF-8 encoding of a surrogate pair
-// (code point = U+10400)
+// (code point = U+10400).
 static const uint8_t kSurrogateEncoding[] = {
     0xed, 0xa0, 0x81,
     0xed, 0xb0, 0x80,
@@ -66,13 +67,13 @@
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(1, ptr, start);
 
-  // Two byte sequence
+  // Two byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(3, ptr, start);
 
-  // Three byte sequence
+  // Three byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -84,7 +85,7 @@
   EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(10, ptr, start);
 
-  // Null terminator
+  // Null terminator.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -117,7 +118,8 @@
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
-  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(),
+                             &input[0], input.size());
   EXPECT_EQ(expected, output);
 }
 
@@ -139,10 +141,10 @@
   AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
 
   AssertConversion({
-      0xd802, 0xdc02,  // Surrogate pair
-      0xdef0, 0xdcff,  // Three byte encodings
-      0x0101, 0x0000,  // Two byte encodings
-      'p'   , 'p'      // One byte encoding
+      0xd802, 0xdc02,  // Surrogate pair.
+      0xdef0, 0xdcff,  // Three byte encodings.
+      0x0101, 0x0000,  // Two byte encodings.
+      'p'   , 'p'      // One byte encoding.
     }, {
       0xf0, 0x90, 0xa0, 0x82,
       0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
@@ -155,9 +157,225 @@
   // Unpaired trailing surrogate at the end of input.
   AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
   // Unpaired (or incorrectly paired) surrogates in the middle of the input.
-  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes {
+      {{ 'h' }, { 'h' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x81 }, { 0xc2, 0x81 }},
+      {{ 0x801 }, { 0xe0, 0xa0, 0x81 }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes {
+      {{ 'e' }, { 'e' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x7ff }, { 0xdf, 0xbf }},
+      {{ 0xffff }, { 0xef, 0xbf, 0xbf }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests {
+      {{ 0xd801 }, { 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }},
+      {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }},
+  };
+  for (const auto& prefix : prefixes) {
+    const std::vector<uint16_t>& prefix_in = prefix.first;
+    const std::vector<uint8_t>& prefix_out = prefix.second;
+    for (const auto& test : tests) {
+      const std::vector<uint16_t>& test_in = test.first;
+      const std::vector<uint8_t>& test_out = test.second;
+      for (const auto& suffix : suffixes) {
+        const std::vector<uint16_t>& suffix_in = suffix.first;
+        const std::vector<uint8_t>& suffix_out = suffix.second;
+        std::vector<uint16_t> in = prefix_in;
+        in.insert(in.end(), test_in.begin(), test_in.end());
+        in.insert(in.end(), suffix_in.begin(), suffix_in.end());
+        std::vector<uint8_t> out = prefix_out;
+        out.insert(out.end(), test_out.begin(), test_out.end());
+        out.insert(out.end(), suffix_out.begin(), suffix_out.end());
+        AssertConversion(in, out);
+      }
+    }
+  }
+}
+
+// Old versions of functions, here to compare answers with optimized versions.
+
+size_t CountModifiedUtf8Chars_reference(const char* utf8) {
+  size_t len = 0;
+  int ic;
+  while ((ic = *utf8++) != '\0') {
+    len++;
+    if ((ic & 0x80) == 0) {
+      // one-byte encoding
+      continue;
+    }
+    // two- or three-byte encoding
+    utf8++;
+    if ((ic & 0x20) == 0) {
+      // two-byte encoding
+      continue;
+    }
+    utf8++;
+    if ((ic & 0x10) == 0) {
+      // three-byte encoding
+      continue;
+    }
+
+    // four-byte encoding: needs to be converted into a surrogate
+    // pair.
+    utf8++;
+    len++;
+  }
+  return len;
+}
+
+static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) {
+  size_t result = 0;
+  while (char_count--) {
+    const uint16_t ch = *chars++;
+    if (ch > 0 && ch <= 0x7f) {
+      ++result;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
+      } else {
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
+      }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
+    }
+  }
+  return result;
+}
+
+static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in,
+                                                 size_t char_count) {
+  while (char_count--) {
+    const uint16_t ch = *utf16_in++;
+    if (ch > 0 && ch <= 0x7f) {
+      *utf8_out++ = ch;
+    } else {
+      // Char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
+      if (ch > 0x07ff) {
+        // Three byte encoding.
+        *utf8_out++ = (ch >> 12) | 0xe0;
+        *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
+        *utf8_out++ = (ch >> 6) | 0xc0;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      }
+    }
+  }
+}
+
+// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again.
+
+static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) {
+  first = (code_point >> 10) + 0xd7c0;
+  second = (code_point & 0x03ff) + 0xdc00;
+}
+
+static void testConversions(uint16_t *buf, int char_count) {
+  char bytes_test[8], bytes_reference[8];
+  uint16_t out_buf_test[4], out_buf_reference[4];
+  int byte_count_test, byte_count_reference;
+  int char_count_test, char_count_reference;
+
+  // Calculate the number of utf-8 bytes for the utf-16 chars.
+  byte_count_reference = CountUtf8Bytes_reference(buf, char_count);
+  byte_count_test = CountUtf8Bytes(buf, char_count);
+  EXPECT_EQ(byte_count_reference, byte_count_test);
+
+  // Convert the utf-16 string to utf-8 bytes.
+  ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
+  ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
+  for (int i = 0; i < byte_count_test; ++i) {
+    EXPECT_EQ(bytes_reference[i], bytes_test[i]);
+  }
+
+  // Calculate the number of utf-16 chars from the utf-8 bytes.
+  bytes_reference[byte_count_reference] = 0;  // Reference function needs null termination.
+  char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+  char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+  EXPECT_EQ(char_count, char_count_reference);
+  EXPECT_EQ(char_count, char_count_test);
+
+  // Convert the utf-8 bytes back to utf-16 chars.
+  // Does not need copied _reference version of the function because the original
+  // function with the old API is retained for debug/testing code.
+  ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
+  ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
+  for (int i = 0; i < char_count_test; ++i) {
+    EXPECT_EQ(buf[i], out_buf_reference[i]);
+    EXPECT_EQ(buf[i], out_buf_test[i]);
+  }
+}
+
+TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) {
+  for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) {
+    uint16_t buf[4];
+    if (codePoint <= 0xffff) {
+      if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
+        // According to the Unicode standard, no character will ever
+        // be assigned to these code points, and they can not be encoded
+        // into either utf-16 or utf-8.
+        continue;
+      }
+      buf[0] = 'h';
+      buf[1] = codePoint;
+      buf[2] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+    } else {
+      buf[0] = 'h';
+      codePointToSurrogatePair(codePoint, buf[1], buf[2]);
+      buf[3] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf, 4);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+      testConversions(buf + 1, 3);
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 48dce63..68db7e3 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1434,7 +1434,8 @@
     execv(program, &args[0]);
 
     PLOG(ERROR) << "Failed to execv(" << command_line << ")";
-    exit(1);
+    // _exit to avoid atexit handlers in child.
+    _exit(1);
   } else {
     if (pid == -1) {
       *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
diff --git a/runtime/utils.h b/runtime/utils.h
index 3690f86..8b7941a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -18,9 +18,11 @@
 #define ART_RUNTIME_UTILS_H_
 
 #include <pthread.h>
+#include <stdlib.h>
 
 #include <limits>
 #include <memory>
+#include <random>
 #include <string>
 #include <type_traits>
 #include <vector>
@@ -350,6 +352,26 @@
                  double* parsed_value,
                  UsageFn Usage);
 
+#if defined(__BIONIC__)
+struct Arc4RandomGenerator {
+  typedef uint32_t result_type;
+  static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
+  static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
+  uint32_t operator() () { return arc4random(); }
+};
+using RNG = Arc4RandomGenerator;
+#else
+using RNG = std::random_device;
+#endif
+
+template <typename T>
+T GetRandomNumber(T min, T max) {
+  CHECK_LT(min, max);
+  std::uniform_int_distribution<T> dist(min, max);
+  RNG rng;
+  return dist(rng);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2db79ab..364b8ce 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -52,7 +52,7 @@
 namespace verifier {
 
 static constexpr bool kTimeVerifyMethod = !kIsDebugBuild;
-static constexpr bool gDebugVerify = false;
+static constexpr bool kDebugVerify = false;
 // TODO: Add a constant to method_verifier to turn on verbose logging?
 
 // On VLOG(verifier), should we dump the whole state when we run into a hard failure?
@@ -114,21 +114,10 @@
   reg_line->MarkAllRegistersAsConflicts(verifier);
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyMethod(
-    ArtMethod* method, bool allow_soft_failures, std::string* error ATTRIBUTE_UNUSED) {
-  StackHandleScope<2> hs(Thread::Current());
-  mirror::Class* klass = method->GetDeclaringClass();
-  auto h_dex_cache(hs.NewHandle(klass->GetDexCache()));
-  auto h_class_loader(hs.NewHandle(klass->GetClassLoader()));
-  return VerifyMethod(hs.Self(), method->GetDexMethodIndex(), method->GetDexFile(), h_dex_cache,
-                      h_class_loader, klass->GetClassDef(), method->GetCodeItem(), method,
-                      method->GetAccessFlags(), allow_soft_failures, false);
-}
-
-
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
                                                         mirror::Class* klass,
                                                         bool allow_soft_failures,
+                                                        bool log_hard_failures,
                                                         std::string* error) {
   if (klass->IsVerified()) {
     return kNoFailure;
@@ -160,8 +149,91 @@
   StackHandleScope<2> hs(self);
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
-  return VerifyClass(
-      self, &dex_file, dex_cache, class_loader, class_def, allow_soft_failures, error);
+  return VerifyClass(self,
+                     &dex_file,
+                     dex_cache,
+                     class_loader,
+                     class_def,
+                     allow_soft_failures,
+                     log_hard_failures,
+                     error);
+}
+
+template <bool kDirect>
+static bool HasNextMethod(ClassDataItemIterator* it) {
+  return kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod();
+}
+
+template <bool kDirect>
+void MethodVerifier::VerifyMethods(Thread* self,
+                                   ClassLinker* linker,
+                                   const DexFile* dex_file,
+                                   const DexFile::ClassDef* class_def,
+                                   ClassDataItemIterator* it,
+                                   Handle<mirror::DexCache> dex_cache,
+                                   Handle<mirror::ClassLoader> class_loader,
+                                   bool allow_soft_failures,
+                                   bool log_hard_failures,
+                                   bool need_precise_constants,
+                                   bool* hard_fail,
+                                   size_t* error_count,
+                                   std::string* error_string) {
+  DCHECK(it != nullptr);
+
+  int64_t previous_method_idx = -1;
+  while (HasNextMethod<kDirect>(it)) {
+    self->AllowThreadSuspension();
+    uint32_t method_idx = it->GetMemberIndex();
+    if (method_idx == previous_method_idx) {
+      // smali can create dex files with two encoded_methods sharing the same method_idx
+      // http://code.google.com/p/smali/issues/detail?id=119
+      it->Next();
+      continue;
+    }
+    previous_method_idx = method_idx;
+    InvokeType type = it->GetMethodInvokeType(*class_def);
+    ArtMethod* method = linker->ResolveMethod(
+        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
+    if (method == nullptr) {
+      DCHECK(self->IsExceptionPending());
+      // We couldn't resolve the method, but continue regardless.
+      self->ClearException();
+    } else {
+      DCHECK(method->GetDeclaringClassUnchecked() != nullptr) << type;
+    }
+    StackHandleScope<1> hs(self);
+    std::string hard_failure_msg;
+    MethodVerifier::FailureKind result = VerifyMethod(self,
+                                                      method_idx,
+                                                      dex_file,
+                                                      dex_cache,
+                                                      class_loader,
+                                                      class_def,
+                                                      it->GetMethodCodeItem(),
+                                                      method,
+                                                      it->GetMethodAccessFlags(),
+                                                      allow_soft_failures,
+                                                      log_hard_failures,
+                                                      need_precise_constants,
+                                                      &hard_failure_msg);
+    if (result != kNoFailure) {
+      if (result == kHardFailure) {
+        if (*error_count > 0) {
+          *error_string += "\n";
+        }
+        if (!*hard_fail) {
+          *error_string += "Verifier rejected class ";
+          *error_string += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
+          *error_string += ":";
+        }
+        *error_string += " ";
+        *error_string += hard_failure_msg;
+        *hard_fail = true;
+      }
+      *error_count = *error_count + 1;
+    }
+    it->Next();
+  }
 }
 
 MethodVerifier::FailureKind MethodVerifier::VerifyClass(Thread* self,
@@ -170,6 +242,7 @@
                                                         Handle<mirror::ClassLoader> class_loader,
                                                         const DexFile::ClassDef* class_def,
                                                         bool allow_soft_failures,
+                                                        bool log_hard_failures,
                                                         std::string* error) {
   DCHECK(class_def != nullptr);
 
@@ -193,94 +266,35 @@
   size_t error_count = 0;
   bool hard_fail = false;
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  int64_t previous_direct_method_idx = -1;
-  while (it.HasNextDirectMethod()) {
-    self->AllowThreadSuspension();
-    uint32_t method_idx = it.GetMemberIndex();
-    if (method_idx == previous_direct_method_idx) {
-      // smali can create dex files with two encoded_methods sharing the same method_idx
-      // http://code.google.com/p/smali/issues/detail?id=119
-      it.Next();
-      continue;
-    }
-    previous_direct_method_idx = method_idx;
-    InvokeType type = it.GetMethodInvokeType(*class_def);
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
-    if (method == nullptr) {
-      DCHECK(self->IsExceptionPending());
-      // We couldn't resolve the method, but continue regardless.
-      self->ClearException();
-    } else {
-      DCHECK(method->GetDeclaringClassUnchecked() != nullptr) << type;
-    }
-    StackHandleScope<1> hs(self);
-    MethodVerifier::FailureKind result = VerifyMethod(self,
-                                                      method_idx,
-                                                      dex_file,
-                                                      dex_cache,
-                                                      class_loader,
-                                                      class_def,
-                                                      it.GetMethodCodeItem(),
-        method, it.GetMethodAccessFlags(), allow_soft_failures, false);
-    if (result != kNoFailure) {
-      if (result == kHardFailure) {
-        hard_fail = true;
-        if (error_count > 0) {
-          *error += "\n";
-        }
-        *error = "Verifier rejected class ";
-        *error += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
-        *error += " due to bad method ";
-        *error += PrettyMethod(method_idx, *dex_file);
-      }
-      ++error_count;
-    }
-    it.Next();
-  }
-  int64_t previous_virtual_method_idx = -1;
-  while (it.HasNextVirtualMethod()) {
-    self->AllowThreadSuspension();
-    uint32_t method_idx = it.GetMemberIndex();
-    if (method_idx == previous_virtual_method_idx) {
-      // smali can create dex files with two encoded_methods sharing the same method_idx
-      // http://code.google.com/p/smali/issues/detail?id=119
-      it.Next();
-      continue;
-    }
-    previous_virtual_method_idx = method_idx;
-    InvokeType type = it.GetMethodInvokeType(*class_def);
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
-    if (method == nullptr) {
-      DCHECK(self->IsExceptionPending());
-      // We couldn't resolve the method, but continue regardless.
-      self->ClearException();
-    }
-    StackHandleScope<1> hs(self);
-    MethodVerifier::FailureKind result = VerifyMethod(self,
-                                                      method_idx,
-                                                      dex_file,
-                                                      dex_cache,
-                                                      class_loader,
-                                                      class_def,
-                                                      it.GetMethodCodeItem(),
-        method, it.GetMethodAccessFlags(), allow_soft_failures, false);
-    if (result != kNoFailure) {
-      if (result == kHardFailure) {
-        hard_fail = true;
-        if (error_count > 0) {
-          *error += "\n";
-        }
-        *error = "Verifier rejected class ";
-        *error += PrettyDescriptor(dex_file->GetClassDescriptor(*class_def));
-        *error += " due to bad method ";
-        *error += PrettyMethod(method_idx, *dex_file);
-      }
-      ++error_count;
-    }
-    it.Next();
-  }
+  // Direct methods.
+  VerifyMethods<true>(self,
+                      linker,
+                      dex_file,
+                      class_def,
+                      &it,
+                      dex_cache,
+                      class_loader,
+                      allow_soft_failures,
+                      log_hard_failures,
+                      false /* need precise constants */,
+                      &hard_fail,
+                      &error_count,
+                      error);
+  // Virtual methods.
+  VerifyMethods<false>(self,
+                      linker,
+                      dex_file,
+                      class_def,
+                      &it,
+                      dex_cache,
+                      class_loader,
+                      allow_soft_failures,
+                      log_hard_failures,
+                      false /* need precise constants */,
+                      &hard_fail,
+                      &error_count,
+                      error);
+
   if (error_count == 0) {
     return kNoFailure;
   } else {
@@ -299,7 +313,8 @@
   return registers_size * insns_size > 4*1024*1024;
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyMethod(Thread* self, uint32_t method_idx,
+MethodVerifier::FailureKind MethodVerifier::VerifyMethod(Thread* self,
+                                                         uint32_t method_idx,
                                                          const DexFile* dex_file,
                                                          Handle<mirror::DexCache> dex_cache,
                                                          Handle<mirror::ClassLoader> class_loader,
@@ -308,7 +323,9 @@
                                                          ArtMethod* method,
                                                          uint32_t method_access_flags,
                                                          bool allow_soft_failures,
-                                                         bool need_precise_constants) {
+                                                         bool log_hard_failures,
+                                                         bool need_precise_constants,
+                                                         std::string* hard_failure_msg) {
   MethodVerifier::FailureKind result = kNoFailure;
   uint64_t start_ns = kTimeVerifyMethod ? NanoTime() : 0;
 
@@ -321,8 +338,8 @@
     CHECK(!verifier.have_pending_hard_failure_);
     if (verifier.failures_.size() != 0) {
       if (VLOG_IS_ON(verifier)) {
-          verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
-                                << PrettyMethod(method_idx, *dex_file) << "\n");
+        verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
+                                                    << PrettyMethod(method_idx, *dex_file) << "\n");
       }
       result = kSoftFailure;
     }
@@ -336,11 +353,18 @@
       result = kSoftFailure;
     } else {
       CHECK(verifier.have_pending_hard_failure_);
-      verifier.DumpFailures(LOG(INFO) << "Verification error in "
-                                      << PrettyMethod(method_idx, *dex_file) << "\n");
+      if (VLOG_IS_ON(verifier) || log_hard_failures) {
+        verifier.DumpFailures(LOG(INFO) << "Verification error in "
+                                        << PrettyMethod(method_idx, *dex_file) << "\n");
+      }
+      if (hard_failure_msg != nullptr) {
+        CHECK(!verifier.failure_messages_.empty());
+        *hard_failure_msg =
+            verifier.failure_messages_[verifier.failure_messages_.size() - 1]->str();
+      }
       result = kHardFailure;
     }
-    if (gDebugVerify) {
+    if (kDebugVerify) {
       std::cout << "\n" << verifier.info_messages_.str();
       verifier.Dump(std::cout);
     }
@@ -1741,7 +1765,7 @@
     GetInstructionFlags(insn_idx).ClearChanged();
   }
 
-  if (gDebugVerify) {
+  if (kDebugVerify) {
     /*
      * Scan for dead code. There's nothing "evil" about dead code
      * (besides the wasted space), but it indicates a flaw somewhere
@@ -1874,7 +1898,7 @@
 
   int32_t branch_target = 0;
   bool just_set_result = false;
-  if (gDebugVerify) {
+  if (kDebugVerify) {
     // Generate processing back trace to debug verifier
     LogVerifyInfo() << "Processing " << inst->DumpString(dex_file_) << "\n"
                     << work_line_->Dump(this) << "\n";
@@ -2045,6 +2069,10 @@
           } else if (reg_type.IsUninitializedTypes()) {
             Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "returning uninitialized object '"
                                               << reg_type << "'";
+          } else if (!reg_type.IsReferenceTypes()) {
+            // We really do expect a reference here.
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-object returns a non-reference type "
+                                              << reg_type;
           } else if (!return_type.IsAssignableFrom(reg_type)) {
             if (reg_type.IsUnresolvedTypes() || return_type.IsUnresolvedTypes()) {
               Fail(VERIFY_ERROR_NO_CLASS) << " can't resolve returned type '" << return_type
@@ -4659,7 +4687,7 @@
     }
   } else {
     ArenaUniquePtr<RegisterLine> copy;
-    if (gDebugVerify) {
+    if (kDebugVerify) {
       copy.reset(RegisterLine::Create(target_line->NumRegs(), this));
       copy->CopyFromLine(target_line);
     }
@@ -4667,7 +4695,7 @@
     if (have_pending_hard_failure_) {
       return false;
     }
-    if (gDebugVerify && changed) {
+    if (kDebugVerify && changed) {
       LogVerifyInfo() << "Merging at [" << reinterpret_cast<void*>(work_insn_idx_) << "]"
                       << " to [" << reinterpret_cast<void*>(next_insn) << "]: " << "\n"
                       << copy->Dump(this) << "  MERGE\n"
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 7b51d6e..719f0d7 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -139,14 +139,20 @@
   };
 
   /* Verify a class. Returns "kNoFailure" on success. */
-  static FailureKind VerifyClass(Thread* self, mirror::Class* klass, bool allow_soft_failures,
+  static FailureKind VerifyClass(Thread* self,
+                                 mirror::Class* klass,
+                                 bool allow_soft_failures,
+                                 bool log_hard_failures,
                                  std::string* error)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  static FailureKind VerifyClass(Thread* self, const DexFile* dex_file,
+  static FailureKind VerifyClass(Thread* self,
+                                 const DexFile* dex_file,
                                  Handle<mirror::DexCache> dex_cache,
                                  Handle<mirror::ClassLoader> class_loader,
                                  const DexFile::ClassDef* class_def,
-                                 bool allow_soft_failures, std::string* error)
+                                 bool allow_soft_failures,
+                                 bool log_hard_failures,
+                                 std::string* error)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static MethodVerifier* VerifyMethodAndDump(Thread* self,
@@ -160,9 +166,6 @@
                                              uint32_t method_access_flags)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static FailureKind VerifyMethod(ArtMethod* method, bool allow_soft_failures,
-                                  std::string* error) SHARED_REQUIRES(Locks::mutator_lock_);
-
   uint8_t EncodePcToReferenceMapData() const;
 
   uint32_t DexFileVersion() const {
@@ -310,6 +313,24 @@
   // Adds the given string to the end of the last failure message.
   void AppendToLastFailMessage(std::string);
 
+  // Verify all direct or virtual methods of a class. The method assumes that the iterator is
+  // positioned correctly, and the iterator will be updated.
+  template <bool kDirect>
+  static void VerifyMethods(Thread* self,
+                            ClassLinker* linker,
+                            const DexFile* dex_file,
+                            const DexFile::ClassDef* class_def,
+                            ClassDataItemIterator* it,
+                            Handle<mirror::DexCache> dex_cache,
+                            Handle<mirror::ClassLoader> class_loader,
+                            bool allow_soft_failures,
+                            bool log_hard_failures,
+                            bool need_precise_constants,
+                            bool* hard_fail,
+                            size_t* error_count,
+                            std::string* error_string)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   /*
    * Perform verification on a single method.
    *
@@ -321,13 +342,18 @@
    *  (3) Iterate through the method, checking type safety and looking
    *      for code flow problems.
    */
-  static FailureKind VerifyMethod(Thread* self, uint32_t method_idx, const DexFile* dex_file,
+  static FailureKind VerifyMethod(Thread* self, uint32_t method_idx,
+                                  const DexFile* dex_file,
                                   Handle<mirror::DexCache> dex_cache,
                                   Handle<mirror::ClassLoader> class_loader,
                                   const DexFile::ClassDef* class_def_idx,
                                   const DexFile::CodeItem* code_item,
-                                  ArtMethod* method, uint32_t method_access_flags,
-                                  bool allow_soft_failures, bool need_precise_constants)
+                                  ArtMethod* method,
+                                  uint32_t method_access_flags,
+                                  bool allow_soft_failures,
+                                  bool log_hard_failures,
+                                  bool need_precise_constants,
+                                  std::string* hard_failure_msg)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void FindLocksAtDexPc() SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 2ab6b4a..c4123d5 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -37,7 +37,7 @@
 
     // Verify the class
     std::string error_msg;
-    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, true, &error_msg) == MethodVerifier::kNoFailure)
+    ASSERT_TRUE(MethodVerifier::VerifyClass(self, klass, true, true, &error_msg) == MethodVerifier::kNoFailure)
         << error_msg;
   }
 
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 3f00a1a..057b351 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -21,6 +21,8 @@
 
 # android.test.anno.MissingAnnotation is available at compile time...
 ${JAVAC} -d classes `find src -name '*.java'`
+# overwrite RenamedEnum
+${JAVAC} -d classes `find src2 -name '*.java'`
 
 # ...but not at run time.
 rm 'classes/android/test/anno/MissingAnnotation.class'
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index e1c3dad..180adf8 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -108,3 +108,4 @@
 
 Get annotation with missing class should not throw
 Got expected TypeNotPresentException
+Got expected NoSuchFieldError
diff --git a/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
new file mode 100644
index 0000000..7a15c64
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface AnnoRenamedEnumMethod {
+    RenamedEnumClass.RenamedEnum renamed() default RenamedEnumClass.RenamedEnum.FOO;
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..cfba819
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOO, BAR };
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedNoted.java b/test/005-annotations/src/android/test/anno/RenamedNoted.java
new file mode 100644
index 0000000..aae3a3f
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedNoted.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+public class RenamedNoted {
+    @AnnoRenamedEnumMethod(renamed=RenamedEnumClass.RenamedEnum.BAR)
+    public int bar() {
+        return 0;
+    }
+}
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 7b74a73..2f0a8d3 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -1,3 +1,19 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package android.test.anno;
 
 import java.lang.annotation.Annotation;
@@ -199,5 +215,15 @@
         } catch (TypeNotPresentException expected) {
             System.out.println("Got expected TypeNotPresentException");
         }
+
+        // Test renamed enums.
+        try {
+            for (Method m: RenamedNoted.class.getDeclaredMethods()) {
+                Annotation[] annos = m.getDeclaredAnnotations();
+                System.out.println("  annotations on METH " + m + ":");
+            }
+        } catch (NoSuchFieldError expected) {
+            System.out.println("Got expected NoSuchFieldError");
+        }
     }
 }
diff --git a/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..5a2fe36
--- /dev/null
+++ b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOOBAR };
+}
diff --git a/test/008-exceptions/expected.txt b/test/008-exceptions/expected.txt
index 92c79dc..083ecf7 100644
--- a/test/008-exceptions/expected.txt
+++ b/test/008-exceptions/expected.txt
@@ -1,12 +1,17 @@
 Got an NPE: second throw
 java.lang.NullPointerException: second throw
-	at Main.catchAndRethrow(Main.java:58)
-	at Main.exceptions_007(Main.java:41)
-	at Main.main(Main.java:49)
+	at Main.catchAndRethrow(Main.java:77)
+	at Main.exceptions_007(Main.java:59)
+	at Main.main(Main.java:67)
 Caused by: java.lang.NullPointerException: first throw
-	at Main.throwNullPointerException(Main.java:65)
-	at Main.catchAndRethrow(Main.java:55)
+	at Main.throwNullPointerException(Main.java:84)
+	at Main.catchAndRethrow(Main.java:74)
 	... 2 more
 Static Init
-BadError: This is bad by convention
-BadError: This is bad by convention
+BadError: This is bad by convention: BadInit
+java.lang.NoClassDefFoundError: BadInit
+BadError: This is bad by convention: BadInit
+Static BadInitNoStringInit
+BadErrorNoStringInit: This is bad by convention
+java.lang.NoClassDefFoundError: BadInitNoStringInit
+BadErrorNoStringInit: This is bad by convention
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 7f6d0c5..9e3477a 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -14,20 +14,38 @@
  * limitations under the License.
  */
 
-// An exception that doesn't have a <init>(String) method.
+// An error class.
 class BadError extends Error {
-    public BadError() {
-        super("This is bad by convention");
+    public BadError(String s) {
+        super("This is bad by convention: " + s);
     }
 }
 
-// A class that throws BadException during static initialization.
+// A class that throws BadError during static initialization.
 class BadInit {
     static int dummy;
     static {
         System.out.println("Static Init");
         if (true) {
-            throw new BadError();
+            throw new BadError("BadInit");
+        }
+    }
+}
+
+// An error that doesn't have a <init>(String) method.
+class BadErrorNoStringInit extends Error {
+    public BadErrorNoStringInit() {
+        super("This is bad by convention");
+    }
+}
+
+// A class that throws BadErrorNoStringInit during static initialization.
+class BadInitNoStringInit {
+    static int dummy;
+    static {
+        System.out.println("Static BadInitNoStringInit");
+        if (true) {
+            throw new BadErrorNoStringInit();
         }
     }
 }
@@ -48,6 +66,7 @@
     public static void main (String args[]) {
         exceptions_007();
         exceptionsRethrowClassInitFailure();
+        exceptionsRethrowClassInitFailureNoStringInit();
     }
 
     private static void catchAndRethrow() {
@@ -79,8 +98,32 @@
             try {
                 BadInit.dummy = 1;
                 throw new IllegalStateException("Should not reach here.");
-            } catch (BadError e) {
+            } catch (NoClassDefFoundError e) {
                 System.out.println(e);
+                System.out.println(e.getCause());
+            }
+        } catch (Exception error) {
+            error.printStackTrace();
+        }
+    }
+
+    private static void exceptionsRethrowClassInitFailureNoStringInit() {
+        try {
+            try {
+                BadInitNoStringInit.dummy = 1;
+                throw new IllegalStateException("Should not reach here.");
+            } catch (BadErrorNoStringInit e) {
+                System.out.println(e);
+            }
+
+            // Check if it works a second time.
+
+            try {
+                BadInitNoStringInit.dummy = 1;
+                throw new IllegalStateException("Should not reach here.");
+            } catch (NoClassDefFoundError e) {
+                System.out.println(e);
+                System.out.println(e.getCause());
             }
         } catch (Exception error) {
             error.printStackTrace();
diff --git a/test/061-out-of-memory/expected.txt b/test/061-out-of-memory/expected.txt
index ca87629..c31980c 100644
--- a/test/061-out-of-memory/expected.txt
+++ b/test/061-out-of-memory/expected.txt
@@ -4,4 +4,5 @@
 testOomeLarge succeeded
 testOomeSmall beginning
 testOomeSmall succeeded
+Got expected toCharArray OOM
 tests succeeded
diff --git a/test/061-out-of-memory/src/Main.java b/test/061-out-of-memory/src/Main.java
index c812c81..bda978e 100644
--- a/test/061-out-of-memory/src/Main.java
+++ b/test/061-out-of-memory/src/Main.java
@@ -26,6 +26,7 @@
         testHugeArray();
         testOomeLarge();
         testOomeSmall();
+        testOomeToCharArray();
         System.out.println("tests succeeded");
     }
 
@@ -106,4 +107,21 @@
         }
         System.out.println("testOomeSmall succeeded");
     }
+
+    private static void testOomeToCharArray() {
+        Object[] o = new Object[2000000];
+        String test = "test";
+        int i = 0;
+        try {
+            for (; i < o.length; ++i) o[i] = new char[1000000];
+        } catch (OutOfMemoryError oom) {}
+        try {
+            for (; i < o.length; ++i) {
+                o[i] = test.toCharArray();
+            }
+        } catch (OutOfMemoryError oom) {
+            o = null;
+            System.out.println("Got expected toCharArray OOM");
+        }
+    }
 }
diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc
index 3e533ad..b6b1c43 100644
--- a/test/117-nopatchoat/nopatchoat.cc
+++ b/test/117-nopatchoat/nopatchoat.cc
@@ -35,7 +35,7 @@
   }
 
   static bool isRelocationDeltaZero() {
-    gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetImageSpace();
+    gc::space::ImageSpace* space = Runtime::Current()->GetHeap()->GetBootImageSpace();
     return space != nullptr && space->GetImageHeader().GetPatchDelta() == 0;
   }
 
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 78f8842..7762b2d 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -92,7 +92,7 @@
 // detecting this.
 #if __linux__
 static bool IsPicImage() {
-  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+  gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace();
   CHECK(image_space != nullptr);  // We should be running with an image.
   const OatFile* oat_file = image_space->GetOatFile();
   CHECK(oat_file != nullptr);     // We should have an oat file to go with the image.
diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java
index 96302fb..6d6a4f2 100644
--- a/test/441-checker-inliner/src/Main.java
+++ b/test/441-checker-inliner/src/Main.java
@@ -19,7 +19,7 @@
   /// CHECK-START: void Main.InlineVoid() inliner (before)
   /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
   /// CHECK-DAG:                      InvokeStaticOrDirect
-  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>,{{[ij]\d+}}]
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>{{(,[ij]\d+)?}}]
 
   /// CHECK-START: void Main.InlineVoid() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
@@ -31,7 +31,7 @@
 
   /// CHECK-START: int Main.InlineParameter(int) inliner (before)
   /// CHECK-DAG:     <<Param:i\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: int Main.InlineParameter(int) inliner (after)
@@ -44,7 +44,7 @@
 
   /// CHECK-START: long Main.InlineWideParameter(long) inliner (before)
   /// CHECK-DAG:     <<Param:j\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: long Main.InlineWideParameter(long) inliner (after)
@@ -57,7 +57,7 @@
 
   /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (before)
   /// CHECK-DAG:     <<Param:l\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (after)
@@ -128,8 +128,8 @@
   /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
   /// CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
-  /// CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>,{{[ij]\d+}}]
-  /// CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>{{(,[ij]\d+)?}}]
+  /// CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                     Return [<<Phi>>]
 
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index 59e7282..43bc9d0 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -659,6 +659,33 @@
 
 
   /**
+   * Exercise constant folding on constant (static) condition for null references.
+   */
+
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (before)
+  /// CHECK-DAG:     <<Null:l\d+>>    NullConstant
+  /// CHECK-DAG:     <<Cond:z\d+>>    NotEqual [<<Null>>,<<Null>>]
+  /// CHECK-DAG:                      If [<<Cond>>]
+
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
+  /// CHECK-DAG:     <<Const0:i\d+>>  IntConstant 0
+  /// CHECK-DAG:                      If [<<Const0>>]
+
+  /// CHECK-START: int Main.StaticConditionNulls() constant_folding_after_inlining (after)
+  /// CHECK-NOT:                      NotEqual
+
+  private static Object getNull() {
+    return null;
+  }
+
+  public static int StaticConditionNulls() {
+    Object a = getNull();
+    Object b = getNull();
+    return (a == b) ? 5 : 2;
+  }
+
+
+  /**
    * Exercise constant folding on a program with condition
    * (i.e. jumps) leading to the creation of many blocks.
    *
@@ -1208,6 +1235,7 @@
     assertLongEquals(9, XorLongInt());
 
     assertIntEquals(5, StaticCondition());
+    assertIntEquals(5, StaticConditionNulls());
 
     assertIntEquals(7, JumpsAndConditionals(true));
     assertIntEquals(3, JumpsAndConditionals(false));
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index ffeae7d..c3d2759 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -652,20 +652,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -685,27 +684,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
@@ -725,25 +722,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end < array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
@@ -770,18 +767,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
@@ -816,14 +814,18 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  //  array.length is defined before the loop header so no phi is needed.
-  /// CHECK-NOT: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
@@ -831,8 +833,8 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // One HDeoptimize will be added.
-    // It's for (end - 2 <= array.length - 2).
+    // Several HDeoptimize will be added. Two for each index.
+    // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -859,7 +861,6 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-
   /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
@@ -874,23 +875,27 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
   /// CHECK: Goto
-  /// CHECK-NOT: Deoptimize
+  /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 2, one for end <= array.length - 3,
-    // and one for null check on array (to hoist null
-    // check and array.length out of loop).
+    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -914,20 +919,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo7(int[] array, int start, int end, boolean lowEnd) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. One for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -955,26 +959,28 @@
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK: Goto
+  /// CHECK: Goto
+  /// CHECK: If
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo8(int[][] matrix, int start, int end) {
-    // Three HDeoptimize will be added for the outer loop.
-    // start >= 0, end <= matrix.length, and null check on matrix.
-    // Three HDeoptimize will be added for the inner loop
-    // start >= 0 (TODO: this may be optimized away),
-    // end <= row.length, and null check on row.
+    // Three HDeoptimize will be added for the outer loop,
+    // two for the index, and null check on matrix. Same
+    // for the inner loop.
     for (int i = start; i < end; i++) {
       int[] row = matrix[i];
       for (int j = start; j < end; j++) {
@@ -994,15 +1000,22 @@
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Phi
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  //  Simplification removes the redundant check
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: Deoptimize
+
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for
-    // 10 <= array.length, and one for null check on array.
+    // Two HDeoptimize will be added. Two for the index
+    // and one for null check on array.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index f1885de..f1f80ca 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -14,11 +14,19 @@
  * limitations under the License.
  */
 
-interface Interface {
+interface SuperInterface {
+  void superInterfaceMethod();
+}
+
+interface OtherInterface extends SuperInterface {
+}
+
+interface Interface extends SuperInterface {
   void $noinline$f();
 }
 
 class Super implements Interface {
+  public void superInterfaceMethod() {}
   public void $noinline$f() {
     throw new RuntimeException();
   }
@@ -454,7 +462,7 @@
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SubclassC exact:false
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
-  /// CHECK-START: SubclassC Main.inlineGenerics() reference_type_propagation_after_inlining (after)
+  /// CHECK-START: SubclassC Main.inlineGenerics() inliner (after)
   /// CHECK:      <<BoundType:l\d+>> BoundType klass:SubclassC exact:false
   /// CHECK:                         Return [<<BoundType>>]
   private SubclassC inlineGenerics() {
@@ -466,7 +474,7 @@
   /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Final exact:true
   /// CHECK-NEXT:                    Return [<<Invoke>>]
 
-  /// CHECK-START: Final Main.inlineGenericsFinal() reference_type_propagation_after_inlining (after)
+  /// CHECK-START: Final Main.inlineGenericsFinal() inliner (after)
   /// CHECK:      <<BoundType:l\d+>> BoundType klass:Final exact:true
   /// CHECK:                         Return [<<BoundType>>]
   private Final inlineGenericsFinal() {
@@ -474,7 +482,7 @@
     return f;
   }
 
-  /// CHECK-START: void Main.boundOnlyOnceIfNotNull(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: void Main.boundOnlyOnceIfNotNull(java.lang.Object) inliner (after)
   /// CHECK:      BoundType
   /// CHECK-NOT:  BoundType
   private void boundOnlyOnceIfNotNull(Object o) {
@@ -483,7 +491,7 @@
     }
   }
 
-  /// CHECK-START: void Main.boundOnlyOnceIfInstanceOf(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: void Main.boundOnlyOnceIfInstanceOf(java.lang.Object) inliner (after)
   /// CHECK:      BoundType
   /// CHECK-NOT:  BoundType
   private void boundOnlyOnceIfInstanceOf(Object o) {
@@ -492,7 +500,7 @@
     }
   }
 
-  /// CHECK-START: Final Main.boundOnlyOnceCheckCast(Generic) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: Final Main.boundOnlyOnceCheckCast(Generic) inliner (after)
   /// CHECK:      BoundType
   /// CHECK-NOT:  BoundType
   private Final boundOnlyOnceCheckCast(Generic<Final> o) {
@@ -508,7 +516,7 @@
   /// CHECK:      <<Phi:l\d+>> Phi klass:Super
   /// CHECK:                   NullCheck [<<Phi>>] klass:Super
 
-  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: void Main.updateNodesInTheSameBlockAsPhi(boolean) inliner (after)
   /// CHECK:      <<Phi:l\d+>> Phi klass:SubclassA
   /// CHECK:                   NullCheck [<<Phi>>] klass:SubclassA
   private void updateNodesInTheSameBlockAsPhi(boolean cond) {
@@ -519,7 +527,7 @@
     s.$noinline$f();
   }
 
-  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) reference_type_propagation_after_inlining (after)
+  /// CHECK-START: java.lang.String Main.checkcastPreserveNullCheck(java.lang.Object) inliner (after)
   /// CHECK:      <<This:l\d+>>     ParameterValue
   /// CHECK:      <<Param:l\d+>>    ParameterValue
   /// CHECK:      <<Clazz:l\d+>>    LoadClass
@@ -548,6 +556,119 @@
   private void argumentCheck(Super s, double d, SubclassA a, Final f) {
   }
 
+  private Main getNull() {
+    return null;
+  }
+
+  private int mainField = 0;
+
+  /// CHECK-START: SuperInterface Main.getWiderType(boolean, Interface, OtherInterface) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private SuperInterface getWiderType(boolean cond, Interface a, OtherInterface b) {
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, Interface, OtherInterface) inliner (before)
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:SuperInterface
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Invoke>>] klass:SuperInterface exact:false
+  /// CHECK:                         InvokeInterface [<<NullCheck>>]
+
+  /// CHECK-START: void Main.testInlinerWidensReturnType(boolean, Interface, OtherInterface) inliner (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:SuperInterface exact:false
+  /// CHECK:                         InvokeInterface [<<NullCheck>>]
+  private void testInlinerWidensReturnType(boolean cond, Interface a, OtherInterface b) {
+    getWiderType(cond, a, b).superInterfaceMethod();
+  }
+
+  /// CHECK-START: void Main.testInlinerReturnsNull() inliner (before)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Invoke:l\d+>>    InvokeStaticOrDirect klass:Main
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Invoke>>] klass:Main exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+
+  /// CHECK-START: void Main.testInlinerReturnsNull() inliner (after)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Null:l\d+>>      NullConstant klass:java.lang.Object
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Null>>] klass:Main exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+  private void testInlinerReturnsNull() {
+    Main o = getNull();
+    o.mainField = 0;
+  }
+
+  /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (before)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:Main exact:false
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:Main exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+
+  /// CHECK-START: void Main.testPhiHasOnlyNullInputs(boolean) inliner (after)
+  /// CHECK:      <<Int:i\d+>>       IntConstant 0
+  /// CHECK:      <<Null:l\d+>>      NullConstant klass:java.lang.Object
+  /// CHECK:      <<Phi:l\d+>>       Phi [<<Null>>,<<Null>>] klass:java.lang.Object exact:false
+  /// CHECK:      <<NullCheck:l\d+>> NullCheck [<<Phi>>] klass:java.lang.Object exact:false
+  /// CHECK:                         InstanceFieldSet [<<NullCheck>>,<<Int>>]
+  private void testPhiHasOnlyNullInputs(boolean cond) {
+    Main o = cond ? null : getNull();
+    o.mainField = 0;
+    // getSuper() will force a type propagation after inlining
+    // because returns a more precise type.
+    getSuper();
+  }
+
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
+  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Phi>>            Phi [<<Array>>,<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<NC:l\d+>>        NullCheck [<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:                     ArrayGet [<<NC>>,{{i\d+}}] klass:java.lang.Object exact:false
+  private Object[] testInstructionsWithUntypedParent() {
+    Object[] array = null;
+    boolean cond = true;
+    for (int i = 0; i < 10; ++i) {
+      if (cond) {
+        array = new Object[10];
+        array[0] = new Object();
+        cond = false;
+      } else {
+        array[i] = array[0];
+      }
+    }
+    return array;
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/455-set-vreg/info.txt b/test/455-set-vreg/info.txt
deleted file mode 100644
index e8c57b5..0000000
--- a/test/455-set-vreg/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Tests for setting DEX registers in a Java method.
diff --git a/test/455-set-vreg/set_vreg_jni.cc b/test/455-set-vreg/set_vreg_jni.cc
deleted file mode 100644
index 21149f6..0000000
--- a/test/455-set-vreg/set_vreg_jni.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "arch/context.h"
-#include "art_method-inl.h"
-#include "jni.h"
-#include "scoped_thread_state_change.h"
-#include "stack.h"
-#include "thread.h"
-
-namespace art {
-
-namespace {
-
-class TestVisitor : public StackVisitor {
- public:
-  TestVisitor(Thread* thread, Context* context, mirror::Object* this_value)
-      SHARED_REQUIRES(Locks::mutator_lock_)
-      : StackVisitor(thread, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
-        this_value_(this_value) {}
-
-  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
-    ArtMethod* m = GetMethod();
-    std::string m_name(m->GetName());
-
-    if (m_name.compare("testIntVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVReg(m, 2, 5, kIntVReg));
-      CHECK(SetVReg(m, 3, 4, kIntVReg));
-      CHECK(SetVReg(m, 4, 3, kIntVReg));
-      CHECK(SetVReg(m, 5, 2, kIntVReg));
-      CHECK(SetVReg(m, 6, 1, kIntVReg));
-    } else if (m_name.compare("testLongVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVRegPair(m, 4, std::numeric_limits<int64_t>::max(), kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 6, 4, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 8, 3, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 10, 2, kLongLoVReg, kLongHiVReg));
-      CHECK(SetVRegPair(m, 12, 1, kLongLoVReg, kLongHiVReg));
-    } else if (m_name.compare("testFloatVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVReg(m, 2, bit_cast<uint32_t, float>(5.0f), kFloatVReg));
-      CHECK(SetVReg(m, 3, bit_cast<uint32_t, float>(4.0f), kFloatVReg));
-      CHECK(SetVReg(m, 4, bit_cast<uint32_t, float>(3.0f), kFloatVReg));
-      CHECK(SetVReg(m, 5, bit_cast<uint32_t, float>(2.0f), kFloatVReg));
-      CHECK(SetVReg(m, 6, bit_cast<uint32_t, float>(1.0f), kFloatVReg));
-    } else if (m_name.compare("testDoubleVReg") == 0) {
-      uint32_t value = 0;
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
-      CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value_);
-
-      CHECK(SetVRegPair(m, 4, bit_cast<uint64_t, double>(5.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 6, bit_cast<uint64_t, double>(4.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 8, bit_cast<uint64_t, double>(3.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 10, bit_cast<uint64_t, double>(2.0), kDoubleLoVReg, kDoubleHiVReg));
-      CHECK(SetVRegPair(m, 12, bit_cast<uint64_t, double>(1.0), kDoubleLoVReg, kDoubleHiVReg));
-    }
-
-    return true;
-  }
-
-  mirror::Object* this_value_;
-};
-
-extern "C" JNIEXPORT void JNICALL Java_Main_doNativeCallSetVReg(JNIEnv*, jobject value) {
-  ScopedObjectAccess soa(Thread::Current());
-  std::unique_ptr<Context> context(Context::Create());
-  TestVisitor visitor(soa.Self(), context.get(), soa.Decode<mirror::Object*>(value));
-  visitor.WalkStack();
-}
-
-}  // namespace
-
-}  // namespace art
diff --git a/test/455-set-vreg/src/Main.java b/test/455-set-vreg/src/Main.java
deleted file mode 100644
index 4db9d66..0000000
--- a/test/455-set-vreg/src/Main.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-  public Main() {
-  }
-
-  int testIntVReg(int a, int b, int c, int d, int e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  long testLongVReg(long a, long b, long c, long d, long e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  float testFloatVReg(float a, float b, float c, float d, float e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  double testDoubleVReg(double a, double b, double c, double d, double e) {
-    doNativeCallSetVReg();
-    return a - b - c - d - e;
-  }
-
-  native void doNativeCallSetVReg();
-
-  public static void main(String[] args) {
-    System.loadLibrary(args[0]);
-    Main rm = new Main();
-    int intExpected = 5 - 4 - 3 - 2 - 1;
-    int intResult = rm.testIntVReg(0, 0, 0, 0, 0);
-    if (intResult != intExpected) {
-      throw new Error("Expected " + intExpected + ", got " + intResult);
-    }
-
-    long longExpected = Long.MAX_VALUE - 4 - 3 - 2 - 1;
-    long longResult = rm.testLongVReg(0, 0, 0, 0, 0);
-    if (longResult != longExpected) {
-      throw new Error("Expected " + longExpected + ", got " + longResult);
-    }
-
-    float floatExpected = 5.0f - 4.0f - 3.0f - 2.0f - 1.0f;
-    float floatResult = rm.testFloatVReg(0.0f, 0.0f, 0.0f, 0.0f, 0.0f);
-    if (floatResult != floatExpected) {
-      throw new Error("Expected " + floatExpected + ", got " + floatResult);
-    }
-
-    double doubleExpected = 5.0 - 4.0 - 3.0 - 2.0 - 1.0;
-    double doubleResult = rm.testDoubleVReg(0.0, 0.0, 0.0, 0.0, 0.0);
-    if (doubleResult != doubleExpected) {
-      throw new Error("Expected " + doubleExpected + ", got " + doubleResult);
-    }
-  }
-}
diff --git a/test/457-regs/regs_jni.cc b/test/457-regs/regs_jni.cc
index 64b2336..79fa8b0 100644
--- a/test/457-regs/regs_jni.cc
+++ b/test/457-regs/regs_jni.cc
@@ -68,7 +68,7 @@
         CHECK(!success);
       }
 
-      CHECK(GetVReg(m, 3, kReferenceVReg, &value));
+      CHECK(GetVReg(m, 3, kIntVReg, &value));
       CHECK_EQ(value, 1u);
 
       CHECK(GetVReg(m, 4, kFloatVReg, &value));
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index c32d34a..6151fc1 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -389,24 +389,6 @@
     return arg << 0;
   }
 
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (before)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
-  /// CHECK-DAG:     <<Shl:i\d+>>      Shl [<<Arg>>,<<Const1>>]
-  /// CHECK-DAG:                       Return [<<Shl>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-DAG:     <<Arg:i\d+>>      ParameterValue
-  /// CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg>>,<<Arg>>]
-  /// CHECK-DAG:                       Return [<<Add>>]
-
-  /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after)
-  /// CHECK-NOT:                       Shl
-
-  public static int Shl1(int arg) {
-    return arg << 1;
-  }
-
   /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before)
   /// CHECK-DAG:     <<Arg:j\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
@@ -1226,6 +1208,130 @@
     return arg / -0.25f;
   }
 
+  /**
+   * Test strength reduction of factors of the form (2^n + 1).
+   */
+
+  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const9:i\d+>>      IntConstant 9
+  /// CHECK:                            Mul [<<Arg>>,<<Const9>>]
+
+  /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:i\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const3:i\d+>>      IntConstant 3
+  /// CHECK:       <<Shift:i\d+>>       Shl [<<Arg>>,<<Const3>>]
+  /// CHECK-NEXT:                       Add [<<Arg>>,<<Shift>>]
+
+  public static int mulPow2Plus1(int arg) {
+    return arg * 9;
+  }
+
+  /**
+   * Test strength reduction of factors of the form (2^n - 1).
+   */
+
+  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const31:j\d+>>     LongConstant 31
+  /// CHECK:                            Mul [<<Arg>>,<<Const31>>]
+
+  /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after)
+  /// CHECK-DAG:   <<Arg:j\d+>>         ParameterValue
+  /// CHECK-DAG:   <<Const5:i\d+>>      IntConstant 5
+  /// CHECK:       <<Shift:j\d+>>       Shl [<<Arg>>,<<Const5>>]
+  /// CHECK-NEXT:                       Sub [<<Shift>>,<<Arg>>]
+
+  public static long mulPow2Minus1(long arg) {
+    return arg * 31;
+  }
+
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<Field>>,<<Const1>>]
+  /// CHECK-DAG:                        If [<<NE>>]
+
+  /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
+  /// CHECK-DAG:                        If [<<Not>>]
+
+  public static int booleanFieldNotEqualOne() {
+    return (booleanField == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<Field>>,<<Const0>>]
+  /// CHECK-DAG:                        If [<<EQ>>]
+
+  /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (after)
+  /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
+  /// CHECK-DAG:      <<Not:z\d+>>      BooleanNot [<<Field>>]
+  /// CHECK-DAG:                        If [<<Not>>]
+
+  public static int booleanFieldEqualZero() {
+    return (booleanField != false) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<NE:z\d+>>       NotEqual [<<GT>>,<<Const1>>]
+  /// CHECK-DAG:                        If [<<NE>>]
+
+  /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+
+  public static int intConditionNotEqualOne(int i) {
+    return ((i > 42) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:      <<GT:z\d+>>       GreaterThan [<<Arg>>,<<Const42>>]
+  /// CHECK-DAG:      <<EQ:z\d+>>       Equal [<<GT>>,<<Const0>>]
+  /// CHECK-DAG:                        If [<<EQ>>]
+
+  /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after)
+  /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
+  /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
+  /// CHECK-DAG:                        If [<<LE:z\d+>>]
+  /// CHECK-DAG:      <<LE>>            LessThanOrEqual [<<Arg>>,<<Const42>>]
+  // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions.
+
+  public static int intConditionEqualZero(int i) {
+    return ((i > 42) != false) ? 13 : 54;
+  }
+
+  // Test that conditions on float/double are not flipped.
+
+  /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before)
+  /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
+  /// CHECK-DAG:                        NotEqual [{{i\d+}},<<Const1>>]
+
+  public static int floatConditionNotEqualOne(float f) {
+    return ((f > 42.0f) == true) ? 13 : 54;
+  }
+
+  /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before)
+  /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
+  /// CHECK-DAG:                        Equal [{{i\d+}},<<Const0>>]
+
+  public static int doubleConditionEqualZero(double d) {
+    return ((d > 42.0) != false) ? 13 : 54;
+  }
+
   public static void main(String[] args) {
     int arg = 123456;
 
@@ -1274,7 +1380,6 @@
     assertDoubleEquals(Div2(150.0), 75.0);
     assertFloatEquals(DivMP25(100.0f), -400.0f);
     assertDoubleEquals(DivMP25(150.0), -600.0);
-    assertLongEquals(Shl1(100), 200);
     assertIntEquals(UShr28And15(0xc1234567), 0xc);
     assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL);
     assertIntEquals(UShr28And7(0xc1234567), 0x4);
@@ -1283,5 +1388,32 @@
     assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L);
     assertIntEquals(Shr24And127(0xc1234567), 0x41);
     assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L);
+    assertIntEquals(0, mulPow2Plus1(0));
+    assertIntEquals(9, mulPow2Plus1(1));
+    assertIntEquals(18, mulPow2Plus1(2));
+    assertIntEquals(900, mulPow2Plus1(100));
+    assertIntEquals(111105, mulPow2Plus1(12345));
+    assertLongEquals(0, mulPow2Minus1(0));
+    assertLongEquals(31, mulPow2Minus1(1));
+    assertLongEquals(62, mulPow2Minus1(2));
+    assertLongEquals(3100, mulPow2Minus1(100));
+    assertLongEquals(382695, mulPow2Minus1(12345));
+
+    booleanField = false;
+    assertIntEquals(booleanFieldNotEqualOne(), 54);
+    assertIntEquals(booleanFieldEqualZero(), 54);
+    booleanField = true;
+    assertIntEquals(booleanFieldNotEqualOne(), 13);
+    assertIntEquals(booleanFieldEqualZero(), 13);
+    assertIntEquals(intConditionNotEqualOne(6), 54);
+    assertIntEquals(intConditionNotEqualOne(43), 13);
+    assertIntEquals(intConditionEqualZero(6), 54);
+    assertIntEquals(intConditionEqualZero(43), 13);
+    assertIntEquals(floatConditionNotEqualOne(6.0f), 54);
+    assertIntEquals(floatConditionNotEqualOne(43.0f), 13);
+    assertIntEquals(doubleConditionEqualZero(6.0), 54);
+    assertIntEquals(doubleConditionEqualZero(43.0), 13);
   }
+
+  public static boolean booleanField;
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 6dce96c..5080f142 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -19,23 +19,25 @@
   public void invokeVirtual() {
   }
 
-  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) builder (after)
+  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeVirtual
   /// CHECK-DAG:                      ReturnVoid
 
   /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
   /// CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void inlineSharpenInvokeVirtual(Main m) {
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenStringInvoke() ssa_builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeVirtual
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt
index 387e1a7..7de097f 100644
--- a/test/478-checker-clinit-check-pruning/expected.txt
+++ b/test/478-checker-clinit-check-pruning/expected.txt
@@ -4,3 +4,9 @@
 Main$ClassWithClinit4's static initializer
 Main$ClassWithClinit5's static initializer
 Main$ClassWithClinit6's static initializer
+Main$ClassWithClinit7's static initializer
+Main$ClassWithClinit8's static initializer
+Main$ClassWithClinit9's static initializer
+Main$ClassWithClinit10's static initializer
+Main$ClassWithClinit11's static initializer
+Main$ClassWithClinit12's static initializer
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
index e6aab63..7993513 100644
--- a/test/478-checker-clinit-check-pruning/src/Main.java
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -67,14 +67,14 @@
    */
 
   /// CHECK-START: void Main.invokeStaticNotInlined() builder (after)
-  /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
+  /// CHECK:         <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
+  /// CHECK:         <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  /// CHECK:                               InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
 
   /// CHECK-START: void Main.invokeStaticNotInlined() inliner (after)
-  /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
+  /// CHECK:         <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
+  /// CHECK:         <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  /// CHECK:                               InvokeStaticOrDirect [{{([ij]\d+,)?}}<<ClinitCheck>>]
 
   // The following checks ensure the clinit check and load class
   // instructions added by the builder are pruned by the
@@ -83,7 +83,7 @@
   // before the next pass (liveness analysis) instead.
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:implicit
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
   /// CHECK-NOT:                           LoadClass
@@ -269,7 +269,7 @@
   /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before)
   /// CHECK-DAG:     <<IntConstant:i\d+>>  IntConstant 0
   /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK-DAG:                           InvokeStaticOrDirect clinit_check:implicit
   /// CHECK-DAG:                           StaticFieldSet [<<LoadClass>>,<<IntConstant>>]
 
   /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before)
@@ -289,7 +289,7 @@
   /// CHECK-DAG:     <<IntConstant:i\d+>>  IntConstant 0
   /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:true
   /// CHECK-DAG:                           StaticFieldSet [<<LoadClass>>,<<IntConstant>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK-DAG:                           InvokeStaticOrDirect clinit_check:none
 
   /// CHECK-START: void Main.clinitBecauseOfFieldAccess() liveness (before)
   /// CHECK-NOT:                           ClinitCheck
@@ -298,6 +298,206 @@
     ClassWithClinit2.$noinline$staticMethod();
   }
 
+  /*
+   * Verify that LoadClass from const-class is not merged with
+   * later invoke-static (or it's ClinitCheck).
+   */
+
+  /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:false
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:implicit
+
+  /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void constClassAndInvokeStatic(Iterable it) {
+    $opt$inline$ignoreClass(ClassWithClinit7.class);
+    ClassWithClinit7.someStaticMethod(it);
+  }
+
+  static void $opt$inline$ignoreClass(Class c) {
+  }
+
+  static class ClassWithClinit7 {
+    static {
+      System.out.println("Main$ClassWithClinit7's static initializer");
+    }
+
+    // Note: not inlined from constClassAndInvokeStatic() but fully inlined from main().
+    static void someStaticMethod(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * Verify that LoadClass from sget is not merged with later invoke-static.
+   */
+
+  /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void sgetAndInvokeStatic(Iterable it) {
+    $opt$inline$ignoreInt(ClassWithClinit8.value);
+    ClassWithClinit8.someStaticMethod(it);
+  }
+
+  static void $opt$inline$ignoreInt(int i) {
+  }
+
+  static class ClassWithClinit8 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit8's static initializer");
+    }
+
+    // Note: not inlined from sgetAndInvokeStatic() but fully inlined from main().
+    static void someStaticMethod(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * Verify that LoadClass from const-class, ClinitCheck from sget and
+   * InvokeStaticOrDirect from invoke-static are not merged.
+   */
+
+  /// CHECK-START: void Main.constClassSgetAndInvokeStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:false
+  /// CHECK:                               ClinitCheck
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  static void constClassSgetAndInvokeStatic(Iterable it) {
+    $opt$inline$ignoreClass(ClassWithClinit9.class);
+    $opt$inline$ignoreInt(ClassWithClinit9.value);
+    ClassWithClinit9.someStaticMethod(it);
+  }
+
+  static class ClassWithClinit9 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit9's static initializer");
+    }
+
+    // Note: not inlined from constClassSgetAndInvokeStatic() but fully inlined from main().
+    static void someStaticMethod(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * Verify that LoadClass from a fully-inlined invoke-static is not merged
+   * with InvokeStaticOrDirect from a later invoke-static to the same method.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaNonStatic(Iterable it) {
+    inlinedInvokeStaticViaNonStaticHelper(null);
+    inlinedInvokeStaticViaNonStaticHelper(it);
+  }
+
+  static void inlinedInvokeStaticViaNonStaticHelper(Iterable it) {
+    ClassWithClinit10.inlinedForNull(it);
+  }
+
+  static class ClassWithClinit10 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit10's static initializer");
+    }
+
+    static void inlinedForNull(Iterable it) {
+      if (it != null) {
+        // We're not inlining invoke-interface at the moment.
+        it.iterator();
+      }
+    }
+  }
+
+  /*
+   * Check that the LoadClass from an invoke-static C.foo() doesn't get merged with
+   * an invoke-static inside C.foo(). This would mess up the stack walk in the
+   * resolution trampoline where we would have to load C (if C isn't loaded yet)
+   * which is not permitted there.
+   *
+   * Note: In case of failure, we would get an failed assertion during compilation,
+   * so we wouldn't really get to the checker tests below.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaStatic(Iterable it) {
+    ClassWithClinit11.callInlinedForNull(it);
+  }
+
+  static class ClassWithClinit11 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit11's static initializer");
+    }
+
+    static void callInlinedForNull(Iterable it) {
+      inlinedForNull(it);
+    }
+
+    static void inlinedForNull(Iterable it) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /*
+   * A test similar to inlinedInvokeStaticViaStatic() but doing the indirect invoke
+   * twice with the first one to be fully inlined.
+   */
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
+  /// CHECK:                               LoadClass gen_clinit_check:true
+  /// CHECK:                               InvokeStaticOrDirect clinit_check:none
+
+  /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before)
+  /// CHECK-NOT:                           ClinitCheck
+
+  static void inlinedInvokeStaticViaStaticTwice(Iterable it) {
+    ClassWithClinit12.callInlinedForNull(null);
+    ClassWithClinit12.callInlinedForNull(it);
+  }
+
+  static class ClassWithClinit12 {
+    public static int value = 0;
+    static {
+      System.out.println("Main$ClassWithClinit12's static initializer");
+    }
+
+    static void callInlinedForNull(Iterable it) {
+      inlinedForNull(it);
+    }
+
+    static void inlinedForNull(Iterable it) {
+      if (it != null) {
+        // We're not inlining invoke-interface at the moment.
+        it.iterator();
+      }
+    }
+  }
+
   // TODO: Add a test for the case of a static method whose declaring
   // class type index is not available (i.e. when `storage_index`
   // equals `DexFile::kDexNoIndex` in
@@ -310,5 +510,12 @@
     ClassWithClinit4.invokeStaticNotInlined();
     SubClassOfClassWithClinit5.invokeStaticInlined();
     SubClassOfClassWithClinit6.invokeStaticNotInlined();
+    Iterable it = new Iterable() { public java.util.Iterator iterator() { return null; } };
+    constClassAndInvokeStatic(it);
+    sgetAndInvokeStatic(it);
+    constClassSgetAndInvokeStatic(it);
+    inlinedInvokeStaticViaNonStatic(it);
+    inlinedInvokeStaticViaStatic(it);
+    inlinedInvokeStaticViaStaticTwice(it);
   }
 }
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index ab4afdb..1de0bae 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -136,11 +136,11 @@
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
 ## CHECK-DAG:     <<Cst5:i\d+>>  IntConstant 5
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:<<HeaderY>>
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:<<HeaderY>>
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:<<HeaderY>>
 ## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<Cst1>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add5>>       Add [<<PhiX2>>,<<Cst5>>]                   loop:<<HeaderY>>
@@ -152,12 +152,12 @@
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
-## CHECK-DAG:     <<Cst9:i\d+>>  IntConstant 9
+## CHECK-DAG:     <<Cst11:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>]               loop:<<HeaderY:B\d+>>
 ## CHECK-DAG:                    If [<<ArgY>>]                              loop:<<HeaderY>>
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX1>>,<<Cst7>>]                   loop:<<HeaderY>>
 ## CHECK-DAG:                    If [<<ArgZ>>]                              loop:none
-## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst9>>]                   loop:none
+## CHECK-DAG:     <<Mul9:i\d+>>  Mul [<<PhiX1>>,<<Cst11>>]                  loop:none
 ## CHECK-DAG:     <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>]                   loop:none
 ## CHECK-DAG:                    Return [<<PhiX2>>]                         loop:none
 
@@ -177,7 +177,7 @@
 
   # Additional logic which will end up outside the loop
   if-eqz p2, :skip_if
-  mul-int/lit8 p0, p0, 9
+  mul-int/lit8 p0, p0, 11
   :skip_if
 
   if-nez v0, :loop_end    # will always take the branch
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index c1f25b3..87ff3f7 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect recursive:true
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect recursive:true
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt
index b0014d7..42b331f 100644
--- a/test/492-checker-inline-invoke-interface/expected.txt
+++ b/test/492-checker-inline-invoke-interface/expected.txt
@@ -2,4 +2,4 @@
 java.lang.Exception
 	at ForceStatic.<clinit>(Main.java:24)
 	at Main.$inline$foo(Main.java:31)
-	at Main.main(Main.java:48)
+	at Main.main(Main.java:50)
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 9a45485..a8b6307 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,15 +31,17 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
   /// CHECK:           InvokeStaticOrDirect
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeVirtual
+  /// CHECK-NOT:       InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK:           ClinitCheck
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 58c92f1..34d2f64 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -29,6 +29,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linear(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -41,6 +42,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDown(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -53,6 +55,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearObscure(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -66,6 +69,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearVeryObscure(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -79,6 +83,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWhile(int[] x) {
     int i = 0;
     int result = 0;
@@ -92,6 +97,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearThreeWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -108,6 +114,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearFourWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -128,6 +135,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinear(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -143,6 +151,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -162,6 +171,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearWithParameter(int n) {
     int[] x = new int[n];
     for (int i = 0; i < n; i++) {
@@ -174,6 +184,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearCopy(int x[]) {
     int n = x.length;
     int y[] = new int[n];
@@ -183,10 +194,55 @@
     return y;
   }
 
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwo(int x[]) {
+    int n = x.length / 2;
+    int result = 0;
+    for (int i = 0; i < n; i++) {
+      int ii = i << 1;
+      result += x[ii];
+      result += x[ii + 1];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip1(int x[]) {
+    int result = 0;
+    for (int i = 0; i < x.length / 2; i++) {
+      result += x[2 * i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip2(int x[]) {
+    int result = 0;
+    // This case is not optimized.
+    for (int i = 0; i < x.length; i+=2) {
+      result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithCompoundStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
     int result = 0;
@@ -202,6 +258,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -218,6 +275,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -234,6 +292,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -250,6 +309,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -266,6 +326,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -279,6 +340,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -292,6 +354,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -306,6 +369,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -320,6 +384,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -330,10 +395,31 @@
     return result;
   }
 
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int invariantFromPreLoop(int[] x, int y) {
+    int result = 0;
+    // Strange pre-loop that sets upper bound.
+    int hi;
+    while (true) {
+      y = y % 3;
+      hi = x.length;
+      if (y != 123) break;
+    }
+    for (int i = 0; i < hi; i++) {
+       result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicIdiom(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -350,6 +436,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence2(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -372,6 +459,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence4(int tc) {
     int[] x = { 1, 3, 5, 7 };
     // Loop with periodic sequence (0, 1, 2, 3).
@@ -395,6 +483,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -408,6 +497,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -421,6 +511,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -434,6 +525,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -448,6 +540,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -461,6 +554,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -474,6 +568,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -487,6 +582,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -501,6 +597,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
@@ -511,6 +608,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
@@ -521,6 +619,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
@@ -533,6 +632,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
@@ -541,6 +641,306 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE1(int[] x, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[offset + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int wrapAroundDynamicBCE(int[] x) {
+    int w = 9;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int periodicDynamicBCE(int[] x) {
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // This loop could be infinite for hi = max int. Since i is also used
+    // as subscript, however, dynamic bce can proceed.
+    int result = 0;
+    for (int i = lo; i <= hi; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // As above, but now the index is not used as subscript,
+    // and dynamic bce is not applied.
+    int result = 0;
+    for (int k = 0, i = lo; i <= hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
+    int result = 0;
+    // Mix of int and long induction.
+    int k = 0;
+    for (long i = lo; i < hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: ArrayGet
+  static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
+    // Deliberately test array length on a before the loop so that only bounds checks
+    // on constant subscripts remain, making them a viable candidate for hoisting.
+    if (a.length == 0) {
+      return -1;
+    }
+    // Loop that allows BCE on x[i].
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += x[i];
+      if ((i % 10) != 0) {
+        // None of the subscripts inside a conditional are removed by dynamic bce,
+        // making them a candidate for deoptimization based on constant indices.
+        // Compiler should ensure the array loads are not subsequently hoisted
+        // "above" the deoptimization "barrier" on the bounds.
+        a[0][i] = 1;
+        a[1][i] = 2;
+        a[99][i] = 3;
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
+                                                  boolean[] r,
+                                                  byte[] s,
+                                                  char[] t,
+                                                  short[] u,
+                                                  int[] v,
+                                                  long[] w,
+                                                  float[] x,
+                                                  double[] y,
+                                                  Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
+                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+    }
+    return result;
+  }
+
   //
   // Verifier.
   //
@@ -596,6 +996,9 @@
     }
 
     // Linear with non-unit strides.
+    expectEquals(55, linearByTwo(x));
+    expectEquals(25, linearByTwoSkip1(x));
+    expectEquals(25, linearByTwoSkip2(x));
     expectEquals(56, linearWithCompoundStride());
     expectEquals(66, linearWithLargePositiveStride());
     expectEquals(66, linearWithVeryLargePositiveStride());
@@ -608,6 +1011,7 @@
     expectEquals(55, linearDoWhileUp());
     expectEquals(55, linearDoWhileDown());
     expectEquals(55, linearShort());
+    expectEquals(55, invariantFromPreLoop(x, 1));
 
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
@@ -690,6 +1094,86 @@
       sResult += 1000;
     }
     expectEquals(1055, sResult);
+
+    // Dynamic BCE.
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, -1, x.length);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE1(x, 0, x.length);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, 0, x.length + 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Dynamic BCE with offset.
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE2(x, 0, x.length, 0);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1054, sResult);
+
+    // Dynamic BCE candidates.
+    expectEquals(55, wrapAroundDynamicBCE(x));
+    expectEquals(15, periodicDynamicBCE(x));
+    expectEquals(55, dynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEMixedInductionTypes(x, 0, 10));
+
+    // Dynamic BCE combined with constant indices.
+    int[][] a;
+    a = new int[0][0];
+    expectEquals(-1, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    a = new int[100][10];
+    expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    for (int i = 0; i < 10; i++) {
+      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
+    }
+    a = new int[2][10];
+    sResult = 0;
+    try {
+      expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    expectEquals(a[0][1], 1);
+    expectEquals(a[1][1], 2);
+
+    // Dynamic BCE combined with constant indices of all types.
+    boolean[] x1 = { true };
+    byte[] x2 = { 2 };
+    char[] x3 = { 3 };
+    short[] x4 = { 4 };
+    int[] x5 = { 5 };
+    long[] x6 = { 6 };
+    float[] x7 = { 7 };
+    double[] x8 = { 8 };
+    Integer[] x9 = { 9 };
+    expectEquals(505,
+        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index c766aaa..17e88ce 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -22,7 +22,7 @@
     return radius * radius * Math.PI;
   }
   private double radius;
-};
+}
 
 class TestClass {
   TestClass() {
@@ -35,17 +35,31 @@
   int j;
   volatile int k;
   TestClass next;
+  String str;
   static int si;
-};
+}
 
 class SubTestClass extends TestClass {
   int k;
-};
+}
 
 class TestClass2 {
   int i;
   int j;
-};
+}
+
+class Finalizable {
+  static boolean sVisited = false;
+  static final int VALUE = 0xbeef;
+  int i;
+
+  protected void finalize() {
+    if (i != VALUE) {
+      System.out.println("Where is the beef?");
+    }
+    sVisited = true;
+  }
+}
 
 public class Main {
 
@@ -56,7 +70,7 @@
 
   /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
   static double calcCircleArea(double radius) {
@@ -117,11 +131,14 @@
   /// CHECK: InstanceFieldGet
   /// CHECK: InstanceFieldSet
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK-NOT: InstanceFieldGet
 
   // A new allocation shouldn't alias with pre-existing values.
   static int test3(TestClass obj) {
+    // Do an allocation here to avoid the HLoadClass and HClinitCheck
+    // at the second allocation.
+    new TestClass();
     obj.i = 1;
     obj.next.j = 2;
     TestClass obj2 = new TestClass();
@@ -223,7 +240,7 @@
 
   /// CHECK-START: int Main.test8() load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
   /// CHECK: InvokeVirtual
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: InstanceFieldGet
@@ -381,8 +398,8 @@
 
   /// CHECK-START: int Main.test16() load_store_elimination (after)
   /// CHECK: NewInstance
-  /// CHECK-NOT: StaticFieldSet
-  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
 
   // Test inlined constructor.
   static int test16() {
@@ -398,8 +415,8 @@
   /// CHECK-START: int Main.test17() load_store_elimination (after)
   /// CHECK: <<Const0:i\d+>> IntConstant 0
   /// CHECK: NewInstance
-  /// CHECK-NOT: StaticFieldSet
-  /// CHECK-NOT: StaticFieldGet
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
   /// CHECK: Return [<<Const0>>]
 
   // Test getting default value.
@@ -455,6 +472,148 @@
     return obj;
   }
 
+  /// CHECK-START: void Main.test21() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: StaticFieldSet
+  /// CHECK: StaticFieldGet
+
+  /// CHECK-START: void Main.test21() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: StaticFieldSet
+  /// CHECK: InstanceFieldGet
+
+  // Loop side effects can kill heap values, stores need to be kept in that case.
+  static void test21() {
+    TestClass obj = new TestClass();
+    obj.str = "abc";
+    for (int i = 0; i < 2; i++) {
+      // Generate some loop side effect that does write.
+      obj.si = 1;
+    }
+    System.out.print(obj.str.substring(0, 0));
+  }
+
+  /// CHECK-START: int Main.test22() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: int Main.test22() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Loop side effects only affects stores into singletons that dominiates the loop header.
+  static int test22() {
+    int sum = 0;
+    TestClass obj1 = new TestClass();
+    obj1.i = 2;       // This store can't be eliminated since it can be killed by loop side effects.
+    for (int i = 0; i < 2; i++) {
+      TestClass obj2 = new TestClass();
+      obj2.i = 3;    // This store can be eliminated since the singleton is inside the loop.
+      sum += obj2.i;
+    }
+    TestClass obj3 = new TestClass();
+    obj3.i = 5;      // This store can be eliminated since the singleton is created after the loop.
+    sum += obj1.i + obj3.i;
+    return sum;
+  }
+
+  /// CHECK-START: int Main.test23(boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: int Main.test23(boolean) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldGet
+  /// CHECK: Return
+  /// CHECK-NOT: InstanceFieldGet
+  /// CHECK: InstanceFieldSet
+
+  // Test store elimination on merging.
+  static int test23(boolean b) {
+    TestClass obj = new TestClass();
+    obj.i = 3;      // This store can be eliminated since the value flows into each branch.
+    if (b) {
+      obj.i += 1;   // This store cannot be eliminated due to the merge later.
+    } else {
+      obj.i += 2;   // This store cannot be eliminated due to the merge later.
+    }
+    return obj.i;
+  }
+
+  /// CHECK-START: void Main.testFinalizable() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: void Main.testFinalizable() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+
+  // Allocations and stores into finalizable objects cannot be eliminated.
+  static void testFinalizable() {
+    Finalizable finalizable = new Finalizable();
+    finalizable.i = Finalizable.VALUE;
+  }
+
+  static java.lang.ref.WeakReference<Object> getWeakReference() {
+    return new java.lang.ref.WeakReference<>(new Object());
+  }
+
+  static void testFinalizableByForcingGc() {
+    testFinalizable();
+    java.lang.ref.WeakReference<Object> reference = getWeakReference();
+
+    Runtime runtime = Runtime.getRuntime();
+    for (int i = 0; i < 20; ++i) {
+      runtime.gc();
+      System.runFinalization();
+      try {
+        Thread.sleep(1);
+      } catch (InterruptedException e) {
+        throw new AssertionError(e);
+      }
+
+      // Check to see if the weak reference has been garbage collected.
+      if (reference.get() == null) {
+        // A little bit more sleep time to make sure.
+        try {
+          Thread.sleep(100);
+        } catch (InterruptedException e) {
+          throw new AssertionError(e);
+        }
+        if (!Finalizable.sVisited) {
+          System.out.println("finalize() not called.");
+        }
+        return;
+      }
+    }
+    System.out.println("testFinalizableByForcingGc() failed to force gc.");
+  }
+
   public static void assertIntEquals(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -508,5 +667,10 @@
     float[] fa2 = { 1.8f };
     assertFloatEquals(test19(fa1, fa2), 1.8f);
     assertFloatEquals(test20().i, 0);
+    test21();
+    assertIntEquals(test22(), 13);
+    assertIntEquals(test23(true), 4);
+    assertIntEquals(test23(false), 5);
+    testFinalizableByForcingGc();
   }
 }
diff --git a/test/530-checker-regression-reftype-final/smali/TestCase.smali b/test/530-checker-regression-reftype-final/smali/TestCase.smali
index 8fd7bb7..44facfc 100644
--- a/test/530-checker-regression-reftype-final/smali/TestCase.smali
+++ b/test/530-checker-regression-reftype-final/smali/TestCase.smali
@@ -23,7 +23,7 @@
 # inline any methods from array classes, this bug cannot be triggered and we
 # verify it using Checker.
 
-## CHECK-START: void TestCase.testInliner() reference_type_propagation_after_inlining (before)
+## CHECK-START: void TestCase.testInliner() inliner (after)
 ## CHECK-DAG:             CheckCast [<<Phi:l\d+>>,{{l\d+}}]
 ## CHECK-DAG:    <<Phi>>  Phi klass:java.lang.Object[] exact:false
 
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 1b784ae..3f65d5a 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -35,7 +35,7 @@
   }
 
   /// CHECK-START: boolean Main.stringEqualsNull() register (after)
-  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      <<Invoke:z\d+>> InvokeVirtual
   /// CHECK:      Return [<<Invoke>>]
   public static boolean stringEqualsNull() {
     String o = (String)myObject;
@@ -47,7 +47,7 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeVirtual
   /// CHECK-NOT:      test
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
@@ -56,7 +56,7 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      InvokeVirtual
   /// CHECK:      test
   /// CHECK:      jz/eq
   // Check that we don't try to compare the classes.
diff --git a/test/537-checker-jump-over-jump/expected.txt b/test/537-checker-jump-over-jump/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-jump-over-jump/expected.txt
diff --git a/test/537-checker-jump-over-jump/info.txt b/test/537-checker-jump-over-jump/info.txt
new file mode 100644
index 0000000..aeb30bb
--- /dev/null
+++ b/test/537-checker-jump-over-jump/info.txt
@@ -0,0 +1 @@
+Test for X86-64 elimination of jump over jump.
diff --git a/test/537-checker-jump-over-jump/src/Main.java b/test/537-checker-jump-over-jump/src/Main.java
new file mode 100644
index 0000000..cf9a69d
--- /dev/null
+++ b/test/537-checker-jump-over-jump/src/Main.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static int FIBCOUNT = 64;
+  public static int[] fibs;
+
+  /// CHECK-START-X86_64: int Main.test() disassembly (after)
+  /// CHECK-DAG:   <<Zero:i\d+>>        IntConstant 0
+  //
+  /// CHECK:                            If
+  /// CHECK-NEXT:                       cmp
+  /// CHECK-NEXT:                       jnl/ge
+  //
+  /// CHECK-DAG:   <<Fibs:l\d+>>        StaticFieldGet
+  /// CHECK-DAG:                        NullCheck [<<Fibs>>]
+  /// CHECK-NOT:                        jmp
+  /// CHECK-DAG:   <<FibsAtZero:i\d+>>  ArrayGet [<<Fibs>>,<<Zero>>]
+  /// CHECK-DAG:                        Return [<<FibsAtZero>>]
+  //
+  // Checks that there is no conditional jump over a `jmp`
+  // instruction. The `ArrayGet` instruction is in the next block.
+  //
+  // Note that the `StaticFieldGet` HIR instruction above (captured as
+  // `Fibs`) can produce a `jmp` x86-64 instruction when read barriers
+  // are enabled (to jump into the read barrier slow path), which is
+  // different from the `jmp` in the `CHECK-NOT` assertion.
+  public static int test() {
+    for (int i = 1; ; i++) {
+      if (i >= FIBCOUNT) {
+        return fibs[0];
+      }
+      fibs[i] = (i + fibs[(i - 1)]);
+    }
+  }
+
+  public static void main(String[] args) {
+    fibs = new int[FIBCOUNT];
+    fibs[0] = 1;
+    test();
+  }
+}
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 979c4c8..f791adf 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -260,6 +260,218 @@
     return arg ^ 0xf00000000000000fL;
   }
 
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK:                lsls{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                adc{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-ARM: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK:                add
+  /// CHECK:                adc
+
+  /// CHECK-START-X86: long Main.shl1(long) disassembly (after)
+  /// CHECK-NOT:            shl
+
+  public static long shl1(long arg) {
+    return arg << 1;
+  }
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #2
+
+  /// CHECK-START-ARM: long Main.shl2(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl2(long arg) {
+    return arg << 2;
+  }
+
+  /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+  /// CHECK:                lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1
+  /// CHECK:                lsl{{s?|\.w}} {{r\d+}}, <<low>>, #31
+
+  /// CHECK-START-ARM: long Main.shl31(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl31(long arg) {
+    return arg << 31;
+  }
+
+  /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+  /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl32(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}}
+
+  public static long shl32(long arg) {
+    return arg << 32;
+  }
+
+  /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl33(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl33(long arg) {
+    return arg << 33;
+  }
+
+  /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+  /// CHECK-DAG:            lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.shl63(long) disassembly (after)
+  /// CHECK-NOT:            lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shl63(long arg) {
+    return arg << 63;
+  }
+
+  /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+  /// CHECK:                asrs{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
+
+  /// CHECK-START-ARM: long Main.shr1(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr1(long arg) {
+    return arg >> 1;
+  }
+
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.shr2(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr2(long arg) {
+    return arg >> 2;
+  }
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr31(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr31(long arg) {
+    return arg >> 31;
+  }
+
+  /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            mov {{r\d+}}, <<high>>
+
+  /// CHECK-START-ARM: long Main.shr32(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            lsr{{s?|\.w}}
+
+  public static long shr32(long arg) {
+    return arg >> 32;
+  }
+
+  /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr33(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr33(long arg) {
+    return arg >> 33;
+  }
+
+  /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31
+  /// CHECK-DAG:            asr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.shr63(long) disassembly (after)
+  /// CHECK-NOT:            asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long shr63(long arg) {
+    return arg >> 63;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+  /// CHECK:                lsrs{{|.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK:                mov.w {{r\d+}}, {{r\d+}}, rrx
+
+  /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr1(long arg) {
+    return arg >>> 1;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, <<high>>, #2
+
+  /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr2(long arg) {
+    return arg >>> 2;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK:                lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31
+  /// CHECK:                orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1
+  /// CHECK:                lsr{{s?|\.w}} {{r\d+}}, <<high>>, #31
+
+  /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr31(long arg) {
+    return arg >>> 31;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+  /// CHECK-DAG:            mov {{r\d+}}, {{r\d+}}
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}}
+
+  public static long ushr32(long arg) {
+    return arg >>> 32;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #1
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr33(long arg) {
+    return arg >>> 33;
+  }
+
+  /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+  /// CHECK-DAG:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #31
+  /// CHECK-DAG:            mov{{s?|\.w}} {{r\d+}}, #0
+
+  /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after)
+  /// CHECK-NOT:            lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static long ushr63(long arg) {
+    return arg >>> 63;
+  }
+
   /**
    * Test that the `-1` constant is not synthesized in a register and that we
    * instead simply switch between `add` and `sub` instructions with the
@@ -311,5 +523,44 @@
     assertLongEquals(xor0xf00000000000000f(longArg), 0xe23456788765432eL);
 
     assertLongEquals(14, addM1(7));
+
+    assertLongEquals(shl1(longArg), 0x2468acf10eca8642L);
+    assertLongEquals(shl2(longArg), 0x48d159e21d950c84L);
+    assertLongEquals(shl31(longArg), 0x43b2a19080000000L);
+    assertLongEquals(shl32(longArg), 0x8765432100000000L);
+    assertLongEquals(shl33(longArg), 0x0eca864200000000L);
+    assertLongEquals(shl63(longArg), 0x8000000000000000L);
+    assertLongEquals(shl1(~longArg), 0xdb97530ef13579bcL);
+    assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L);
+    assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L);
+    assertLongEquals(shl32(~longArg), 0x789abcde00000000L);
+    assertLongEquals(shl33(~longArg), 0xf13579bc00000000L);
+    assertLongEquals(shl63(~longArg), 0x0000000000000000L);
+
+    assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(shr2(longArg), 0x048d159e21d950c8L);
+    assertLongEquals(shr31(longArg), 0x000000002468acf1L);
+    assertLongEquals(shr32(longArg), 0x0000000012345678L);
+    assertLongEquals(shr33(longArg), 0x00000000091a2b3cL);
+    assertLongEquals(shr63(longArg), 0x0000000000000000L);
+    assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL);
+    assertLongEquals(shr2(~longArg), 0xfb72ea61de26af37L);
+    assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL);
+    assertLongEquals(shr32(~longArg), 0xffffffffedcba987L);
+    assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L);
+    assertLongEquals(shr63(~longArg), 0xffffffffffffffffL);
+
+    assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L);
+    assertLongEquals(ushr2(longArg), 0x048d159e21d950c8L);
+    assertLongEquals(ushr31(longArg), 0x000000002468acf1L);
+    assertLongEquals(ushr32(longArg), 0x0000000012345678L);
+    assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL);
+    assertLongEquals(ushr63(longArg), 0x0000000000000000L);
+    assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL);
+    assertLongEquals(ushr2(~longArg), 0x3b72ea61de26af37L);
+    assertLongEquals(ushr31(~longArg), 0x00000001db97530eL);
+    assertLongEquals(ushr32(~longArg), 0x00000000edcba987L);
+    assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L);
+    assertLongEquals(ushr63(~longArg), 0x0000000000000001L);
   }
 }
diff --git a/test/543-checker-dce-trycatch/expected.txt b/test/543-checker-dce-trycatch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/543-checker-dce-trycatch/expected.txt
diff --git a/test/543-checker-dce-trycatch/info.txt b/test/543-checker-dce-trycatch/info.txt
new file mode 100644
index 0000000..e541938
--- /dev/null
+++ b/test/543-checker-dce-trycatch/info.txt
@@ -0,0 +1 @@
+Tests removal of try/catch blocks by DCE.
\ No newline at end of file
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
new file mode 100644
index 0000000..44e907d
--- /dev/null
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -0,0 +1,317 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method private static $inline$False()Z
+    .registers 1
+    const/4 v0, 0x0
+    return v0
+.end method
+
+# Test a case when one entering TryBoundary is dead but the rest of the try
+# block remains live.
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (before)
+## CHECK: Add
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (before)
+## CHECK:     TryBoundary kind:entry
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-NOT: Add
+
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination_final (after)
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+.method public static testDeadEntry(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    add-int/2addr p0, p1
+
+    :try_start
+    div-int/2addr p0, p2
+
+    :else
+    div-int/2addr p0, p3
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test a case when one exiting TryBoundary is dead but the rest of the try
+# block remains live.
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (before)
+## CHECK: Add
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (before)
+## CHECK:     TryBoundary kind:exit
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-NOT: Add
+
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination_final (after)
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+.method public static testDeadExit(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    :try_start
+    div-int/2addr p0, p2
+
+    if-nez v0, :else
+
+    div-int/2addr p0, p3
+    goto :return
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :else
+    add-int/2addr p0, p1
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test that a catch block remains live and consistent if some of try blocks
+# throwing into it are removed.
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (before)
+## CHECK:     TryBoundary kind:entry
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (before)
+## CHECK:     TryBoundary kind:exit
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (after)
+## CHECK:     TryBoundary kind:entry
+## CHECK-NOT: TryBoundary kind:entry
+
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination_final (after)
+## CHECK:     TryBoundary kind:exit
+## CHECK-NOT: TryBoundary kind:exit
+
+.method public static testOneTryBlockDead(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    :try_start_1
+    div-int/2addr p0, p2
+    :try_end_1
+    .catchall {:try_start_1 .. :try_end_1} :catch_all
+
+    if-eqz v0, :return
+
+    :try_start_2
+    div-int/2addr p0, p3
+    :try_end_2
+    .catchall {:try_start_2 .. :try_end_2} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test that try block membership is recomputed. In this test case, the try entry
+# stored with the merge block gets deleted and SSAChecker would fail if it was
+# not replaced with the try entry from the live branch.
+
+.method public static testRecomputeTryMembership(IIII)I
+    .registers 5
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    # Dead branch
+    :try_start
+    div-int/2addr p0, p1
+    goto :merge
+
+    # Live branch
+    :else
+    div-int/2addr p0, p2
+
+    # Merge block. Make complex so it does not get merged with the live branch.
+    :merge
+    div-int/2addr p0, p3
+    if-eqz p0, :else2
+    div-int/2addr p0, p3
+    :else2
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    const/4 p0, -0x1
+    goto :return
+
+.end method
+
+# Test that DCE removes catch phi uses of instructions defined in dead try blocks.
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-DAG:     <<Arg0:i\d+>>     ParameterValue
+## CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
+## CHECK-DAG:     <<Add:i\d+>>      Add [<<Arg0>>,<<Arg1>>]
+## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                       Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
+## CHECK-DAG:                       Phi [<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                       Phi [<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+
+.method public static testCatchPhiInputs_DefinedInTryBlock(IIII)I
+    .registers 7
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    shr-int/2addr p2, p3
+
+    :try_start
+    const v1, 0xa           # dead catch phi input, defined in entry block
+    add-int v2, p0, p1      # dead catch phi input, defined in the dead block
+    div-int/2addr p0, v2
+
+    :else
+    const v1, 0xb           # live catch phi input
+    const v2, 0xc           # live catch phi input
+    div-int/2addr p0, p3
+
+    const v1, 0xd           # live catch phi input
+    const v2, 0xe           # live catch phi input
+    div-int/2addr p0, p1
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    sub-int p0, v1, v2      # use catch phi values
+    goto :return
+
+.end method
+
+# Test that DCE does not remove catch phi uses of instructions defined outside
+# dead try blocks.
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (before)
+## CHECK-DAG:     <<Arg0:i\d+>>     ParameterValue
+## CHECK-DAG:     <<Arg1:i\d+>>     ParameterValue
+## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
+## CHECK-DAG:     <<Const0xf:i\d+>> IntConstant 15
+## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination_final (after)
+## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
+## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
+## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
+## CHECK-DAG:     <<Const0xd:i\d+>> IntConstant 13
+## CHECK-DAG:     <<Const0xe:i\d+>> IntConstant 14
+## CHECK-DAG:     <<Const0xf:i\d+>> IntConstant 15
+## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
+## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
+
+.method public static testCatchPhiInputs_DefinedOutsideTryBlock(IIII)I
+    .registers 7
+
+    invoke-static {}, LTestCase;->$inline$False()Z
+    move-result v0
+
+    if-eqz v0, :else
+
+    shr-int/2addr p2, p3
+
+    :try_start
+    const v1, 0xa           # dead catch phi input, defined in entry block
+    const v2, 0xf           # dead catch phi input, defined in entry block
+    div-int/2addr p0, v2
+
+    :else
+    const v1, 0xb           # live catch phi input
+    const v2, 0xc           # live catch phi input
+    div-int/2addr p0, p3
+
+    const v1, 0xd           # live catch phi input
+    const v2, 0xe           # live catch phi input
+    div-int/2addr p0, p1
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :return
+    return p0
+
+    :catch_all
+    sub-int p0, v1, v2      # use catch phi values
+    goto :return
+
+.end method
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
new file mode 100644
index 0000000..6e73d0d
--- /dev/null
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  static boolean $inline$False() { return false; }
+
+  // DCE should only merge blocks where the first ends with a Goto.
+  // SSAChecker will fail if the following Throw->TryBoundary blocks are merged.
+  public static void doNotMergeThrow(String str) {
+    try {
+      throw new Exception(str);
+    } catch (Exception ex) {
+      return;
+    }
+  }
+
+  // Test deletion of all try/catch blocks. Multiple catch blocks test deletion
+  // where TryBoundary still has exception handler successors after having removed
+  // some already.
+
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination_final (after)
+  /// CHECK-NOT: TryBoundary
+
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination_final (after)
+  /// CHECK: begin_block
+  /// CHECK: begin_block
+  /// CHECK: begin_block
+  /// CHECK-NOT: begin_block
+
+  public static void testDeadTryCatch(boolean val) {
+    if ($inline$False()) {
+      try {
+        if (val) {
+          throw new ArithmeticException();
+        } else {
+          throw new ArrayIndexOutOfBoundsException();
+        }
+      } catch (ArithmeticException ex) {
+        System.out.println("Unexpected AE catch");
+      } catch (ArrayIndexOutOfBoundsException ex) {
+        System.out.println("Unexpected AIIOB catch");
+      }
+    }
+  }
+
+  public static void main(String[] args) {
+
+  }
+}
\ No newline at end of file
diff --git a/test/543-env-long-ref/env_long_ref.cc b/test/543-env-long-ref/env_long_ref.cc
new file mode 100644
index 0000000..4108323
--- /dev/null
+++ b/test/543-env-long-ref/env_long_ref.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/context.h"
+#include "art_method-inl.h"
+#include "jni.h"
+#include "scoped_thread_state_change.h"
+#include "stack.h"
+#include "thread.h"
+
+namespace art {
+
+namespace {
+
+class TestVisitor : public StackVisitor {
+ public:
+  TestVisitor(const ScopedObjectAccess& soa, Context* context, jobject expected_value)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      : StackVisitor(soa.Self(), context, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        expected_value_(expected_value),
+        found_(false),
+        soa_(soa) {}
+
+  bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+    ArtMethod* m = GetMethod();
+    std::string m_name(m->GetName());
+
+    if (m_name == "testCase") {
+      found_ = true;
+      uint32_t value = 0;
+      CHECK(GetVReg(m, 1, kReferenceVReg, &value));
+      CHECK_EQ(reinterpret_cast<mirror::Object*>(value),
+               soa_.Decode<mirror::Object*>(expected_value_));
+    }
+    return true;
+  }
+
+  jobject expected_value_;
+  bool found_;
+  const ScopedObjectAccess& soa_;
+};
+
+}  // namespace
+
+extern "C" JNIEXPORT void JNICALL Java_Main_lookForMyRegisters(JNIEnv*, jclass, jobject value) {
+  ScopedObjectAccess soa(Thread::Current());
+  std::unique_ptr<Context> context(Context::Create());
+  TestVisitor visitor(soa, context.get(), value);
+  visitor.WalkStack();
+  CHECK(visitor.found_);
+}
+
+}  // namespace art
diff --git a/test/455-set-vreg/expected.txt b/test/543-env-long-ref/expected.txt
similarity index 85%
rename from test/455-set-vreg/expected.txt
rename to test/543-env-long-ref/expected.txt
index 6a5618e..89f155b 100644
--- a/test/455-set-vreg/expected.txt
+++ b/test/543-env-long-ref/expected.txt
@@ -1 +1,2 @@
 JNI_OnLoad called
+42
diff --git a/test/543-env-long-ref/info.txt b/test/543-env-long-ref/info.txt
new file mode 100644
index 0000000..6a42533
--- /dev/null
+++ b/test/543-env-long-ref/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing that used to not return
+the right dex register in debuggable when a new value
+was overwriting the high dex register of a wide value.
diff --git a/test/543-env-long-ref/smali/TestCase.smali b/test/543-env-long-ref/smali/TestCase.smali
new file mode 100644
index 0000000..608d6eb
--- /dev/null
+++ b/test/543-env-long-ref/smali/TestCase.smali
@@ -0,0 +1,26 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static testCase()I
+  .registers 5
+  const-wide/16 v0, 0x1
+  invoke-static {v0, v1}, LMain;->$noinline$allocate(J)LMain;
+  move-result-object v1
+  invoke-static {v1}, LMain;->lookForMyRegisters(LMain;)V
+  iget v2, v1, LMain;->field:I
+  return v2
+.end method
diff --git a/test/543-env-long-ref/src/Main.java b/test/543-env-long-ref/src/Main.java
new file mode 100644
index 0000000..e723789
--- /dev/null
+++ b/test/543-env-long-ref/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Throwable {
+    System.loadLibrary(args[0]);
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod("testCase");
+    Integer a = (Integer)m.invoke(null, (Object[]) null);
+    System.out.println(a);
+  }
+
+  public static Main $noinline$allocate(long a) {
+    try {
+      return new Main();
+    } catch (Exception e) {
+      throw new Error(e);
+    }
+  }
+
+  public static native void lookForMyRegisters(Main m);
+
+  int field = 42;
+}
diff --git a/test/545-tracing-and-jit/expected.txt b/test/545-tracing-and-jit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/545-tracing-and-jit/expected.txt
diff --git a/test/545-tracing-and-jit/info.txt b/test/545-tracing-and-jit/info.txt
new file mode 100644
index 0000000..34e654e
--- /dev/null
+++ b/test/545-tracing-and-jit/info.txt
@@ -0,0 +1,2 @@
+Tests interaction between the JIT and the method tracing
+functionality.
diff --git a/test/545-tracing-and-jit/src/Main.java b/test/545-tracing-and-jit/src/Main.java
new file mode 100644
index 0000000..a2d51d5
--- /dev/null
+++ b/test/545-tracing-and-jit/src/Main.java
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+public class Main {
+    private static final String TEMP_FILE_NAME_PREFIX = "test";
+    private static final String TEMP_FILE_NAME_SUFFIX = ".trace";
+    private static File file;
+
+    public static void main(String[] args) throws Exception {
+        String name = System.getProperty("java.vm.name");
+        if (!"Dalvik".equals(name)) {
+            System.out.println("This test is not supported on " + name);
+            return;
+        }
+        file = createTempFile();
+        try {
+            new Main().ensureCaller(true, 0);
+            new Main().ensureCaller(false, 0);
+        } finally {
+            if (file != null) {
+              file.delete();
+            }
+        }
+    }
+
+    private static File createTempFile() throws Exception {
+        try {
+            return  File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+        } catch (IOException e) {
+            System.setProperty("java.io.tmpdir", "/data/local/tmp");
+            try {
+                return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+            } catch (IOException e2) {
+                System.setProperty("java.io.tmpdir", "/sdcard");
+                return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+            }
+        }
+    }
+
+    // We make sure 'doLoadsOfStuff' has a caller, because it is this caller that will be
+    // pushed in the side instrumentation frame.
+    public void ensureCaller(boolean warmup, int invocationCount) throws Exception {
+        doLoadsOfStuff(warmup, invocationCount);
+    }
+
+    // The number of recursive calls we are going to do in 'doLoadsOfStuff' to ensure
+    // the JIT sees it hot.
+    static final int NUMBER_OF_INVOCATIONS = 5;
+
+    public void doLoadsOfStuff(boolean warmup, int invocationCount) throws Exception {
+        // Warmup is to make sure the JIT gets a chance to compile 'doLoadsOfStuff'.
+        if (warmup) {
+            if (invocationCount < NUMBER_OF_INVOCATIONS) {
+                doLoadsOfStuff(warmup, ++invocationCount);
+            } else {
+                // Give the JIT a chance to compiler.
+                Thread.sleep(1000);
+            }
+        } else {
+            if (invocationCount == 0) {
+                // When running the trace in trace mode, there is already a trace running.
+                if (VMDebug.getMethodTracingMode() != 0) {
+                    VMDebug.stopMethodTracing();
+                }
+                VMDebug.startMethodTracing(file.getPath(), 0, 0, false, 0);
+            }
+            fillJit();
+            if (invocationCount < NUMBER_OF_INVOCATIONS) {
+                doLoadsOfStuff(warmup, ++invocationCount);
+            } else {
+                VMDebug.stopMethodTracing();
+            }
+        }
+    }
+
+    // This method creates enough profiling data to fill the code cache and trigger
+    // a collection in debug mode (at the time of the test 10KB of data space). We
+    // used to crash by not looking at the instrumentation stack and deleting JIT code
+    // that will be later restored by the instrumentation.
+    public static void fillJit() throws Exception {
+        Map map = new HashMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        map = new LinkedHashMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        map = new TreeMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        map = new ConcurrentSkipListMap();
+        map.put("foo", "bar");
+        map.clear();
+        map.containsKey("foo");
+        map.containsValue("foo");
+        map.entrySet();
+        map.equals(map);
+        map.hashCode();
+        map.isEmpty();
+        map.keySet();
+        map.putAll(map);
+        map.remove("foo");
+        map.size();
+        map.put("bar", "foo");
+        map.values();
+
+        Set set = new HashSet();
+        set.add("foo");
+        set.addAll(set);
+        set.clear();
+        set.contains("foo");
+        set.containsAll(set);
+        set.equals(set);
+        set.hashCode();
+        set.isEmpty();
+        set.iterator();
+        set.remove("foo");
+        set.removeAll(set);
+        set.retainAll(set);
+        set.size();
+        set.add("foo");
+        set.toArray();
+
+        set = new LinkedHashSet();
+        set.add("foo");
+        set.addAll(set);
+        set.clear();
+        set.contains("foo");
+        set.containsAll(set);
+        set.equals(set);
+        set.hashCode();
+        set.isEmpty();
+        set.iterator();
+        set.remove("foo");
+        set.removeAll(set);
+        set.retainAll(set);
+        set.size();
+        set.add("foo");
+        set.toArray();
+
+        set = new TreeSet();
+        set.add("foo");
+        set.addAll(set);
+        set.clear();
+        set.contains("foo");
+        set.containsAll(set);
+        set.equals(set);
+        set.hashCode();
+        set.isEmpty();
+        set.iterator();
+        set.remove("foo");
+        set.removeAll(set);
+        set.retainAll(set);
+        set.size();
+        set.add("foo");
+        set.toArray();
+    }
+
+    private static class VMDebug {
+        private static final Method startMethodTracingMethod;
+        private static final Method stopMethodTracingMethod;
+        private static final Method getMethodTracingModeMethod;
+        static {
+            try {
+                Class c = Class.forName("dalvik.system.VMDebug");
+                startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+                        Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+                stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+                getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        }
+
+        public static void startMethodTracing(String filename, int bufferSize, int flags,
+                boolean samplingEnabled, int intervalUs) throws Exception {
+            startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+                    intervalUs);
+        }
+        public static void stopMethodTracing() throws Exception {
+            stopMethodTracingMethod.invoke(null);
+        }
+        public static int getMethodTracingMode() throws Exception {
+            return (int) getMethodTracingModeMethod.invoke(null);
+        }
+    }
+}
diff --git a/test/546-regression-simplify-catch/expected.txt b/test/546-regression-simplify-catch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/546-regression-simplify-catch/expected.txt
diff --git a/test/546-regression-simplify-catch/info.txt b/test/546-regression-simplify-catch/info.txt
new file mode 100644
index 0000000..b146e87
--- /dev/null
+++ b/test/546-regression-simplify-catch/info.txt
@@ -0,0 +1,2 @@
+Tests simplification of catch blocks in the presence of trivially dead code
+that was not verified by the verifier.
diff --git a/test/546-regression-simplify-catch/smali/TestCase.smali b/test/546-regression-simplify-catch/smali/TestCase.smali
new file mode 100644
index 0000000..486b3b0
--- /dev/null
+++ b/test/546-regression-simplify-catch/smali/TestCase.smali
@@ -0,0 +1,104 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test simplification of an empty, dead catch block. Compiler used to segfault
+# because it did expect at least a control-flow instruction (b/25494450).
+
+.method public static testCase_EmptyCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    nop
+
+.end method
+
+# Test simplification of a dead catch block with some code but no control-flow
+# instruction.
+
+.method public static testCase_NoConrolFlowCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    const v1, 0x3
+    add-int v0, v0, v1
+
+.end method
+
+# Test simplification of a dead catch block with normal-predecessors but
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_InvalidLoadException()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    :catch
+    move-exception v0
+
+.end method
+
+# Test simplification of a live catch block with dead normal-predecessors and
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_TriviallyDeadPredecessor(II)I
+    .registers 3
+
+    :try_start
+    div-int v0, p0, p1
+    return v0
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    # Trivially dead predecessor block.
+    add-int p0, p0, p1
+
+    :catch
+    # This verifies because only exceptional predecessors are live.
+    move-exception v0
+    const v0, 0x0
+    return v0
+
+.end method
+
diff --git a/test/546-regression-simplify-catch/src/Main.java b/test/546-regression-simplify-catch/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/546-regression-simplify-catch/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/547-regression-trycatch-critical-edge/expected.txt b/test/547-regression-trycatch-critical-edge/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/expected.txt
diff --git a/test/547-regression-trycatch-critical-edge/info.txt b/test/547-regression-trycatch-critical-edge/info.txt
new file mode 100644
index 0000000..dc798c0
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/info.txt
@@ -0,0 +1,2 @@
+Test a specific SSA building regression a back edge would not be split due to
+being on try/catch boundary.
\ No newline at end of file
diff --git a/test/547-regression-trycatch-critical-edge/smali/TestCase.smali b/test/547-regression-trycatch-critical-edge/smali/TestCase.smali
new file mode 100644
index 0000000..53a3cc5
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/smali/TestCase.smali
@@ -0,0 +1,57 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# The following test case would crash liveness analysis because the back edge of
+# the outer loop would have a smaller liveness position than the two back edges
+# of the inner loop. This was caused by a bug which did not split the critical
+# edge between TryBoundary and outer loop header (b/25493695).
+
+.method public static testCase(II)I
+  .registers 10
+
+  const v0, 0x0                                       # v0 = result
+  const v1, 0x1                                       # v1 = const 1
+
+  move v2, p0                                         # v2 = outer loop counter
+  :outer_loop
+  if-eqz v2, :return
+  sub-int/2addr v2, v1
+
+  :try_start
+
+  move v3, p1                                         # v3 = inner loop counter
+  :inner_loop
+  invoke-static {}, Ljava/lang/System;->nanoTime()J   # throwing instruction
+  if-eqz v3, :outer_loop                              # back edge of outer loop
+  sub-int/2addr v3, v1
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J   # throwing instruction
+  add-int/2addr v0, v1
+  goto :inner_loop                                    # back edge of inner loop
+
+  :try_end
+  .catchall {:try_start .. :try_end} :catch
+
+  :catch
+  const v4, 0x2
+  add-int/2addr v0, v4
+  goto :inner_loop                                    # back edge of inner loop
+
+  :return
+  return v0
+
+.end method
diff --git a/test/547-regression-trycatch-critical-edge/src/Main.java b/test/547-regression-trycatch-critical-edge/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/548-checker-inlining-and-dce/expected.txt b/test/548-checker-inlining-and-dce/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/548-checker-inlining-and-dce/expected.txt
diff --git a/test/548-checker-inlining-and-dce/info.txt b/test/548-checker-inlining-and-dce/info.txt
new file mode 100644
index 0000000..3255d6b
--- /dev/null
+++ b/test/548-checker-inlining-and-dce/info.txt
@@ -0,0 +1 @@
+Test that inlining works when code preventing inlining is eliminated by DCE.
diff --git a/test/548-checker-inlining-and-dce/src/Main.java b/test/548-checker-inlining-and-dce/src/Main.java
new file mode 100644
index 0000000..38fdcc0
--- /dev/null
+++ b/test/548-checker-inlining-and-dce/src/Main.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private void inlinedForNull(Iterable it) {
+    if (it != null) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  private void inlinedForFalse(boolean value, Iterable it) {
+    if (value) {
+      // We're not inlining invoke-interface at the moment.
+      it.iterator();
+    }
+  }
+
+  /// CHECK-START: void Main.testInlinedForFalseInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForFalseInlined(java.lang.Iterable) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeInterface
+
+  public void testInlinedForFalseInlined(Iterable it) {
+    inlinedForFalse(false, it);
+  }
+
+  /// CHECK-START: void Main.testInlinedForFalseNotInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForFalseNotInlined(java.lang.Iterable) inliner (after)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  public void testInlinedForFalseNotInlined(Iterable it) {
+    inlinedForFalse(true, it);
+  }
+
+  /// CHECK-START: void Main.testInlinedForNullInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForNullInlined(java.lang.Iterable) inliner (after)
+  /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeInterface
+
+  public void testInlinedForNullInlined(Iterable it) {
+    inlinedForNull(null);
+  }
+
+  /// CHECK-START: void Main.testInlinedForNullNotInlined(java.lang.Iterable) inliner (before)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  /// CHECK-START: void Main.testInlinedForNullNotInlined(java.lang.Iterable) inliner (after)
+  /// CHECK:                          InvokeStaticOrDirect
+
+  public void testInlinedForNullNotInlined(Iterable it) {
+    inlinedForNull(it);
+  }
+
+  public static void main(String[] args) {
+    Main m = new Main();
+    Iterable it = new Iterable() {
+      public java.util.Iterator iterator() { return null; }
+    };
+    m.testInlinedForFalseInlined(it);
+    m.testInlinedForFalseNotInlined(it);
+    m.testInlinedForNullInlined(it);
+    m.testInlinedForNullNotInlined(it);
+  }
+}
diff --git a/test/549-checker-types-merge/expected.txt b/test/549-checker-types-merge/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/549-checker-types-merge/expected.txt
diff --git a/test/549-checker-types-merge/info.txt b/test/549-checker-types-merge/info.txt
new file mode 100644
index 0000000..f174e20
--- /dev/null
+++ b/test/549-checker-types-merge/info.txt
@@ -0,0 +1 @@
+Checker test for testing the type merge during reference type propagation.
diff --git a/test/549-checker-types-merge/src/Main.java b/test/549-checker-types-merge/src/Main.java
new file mode 100644
index 0000000..dc27f10
--- /dev/null
+++ b/test/549-checker-types-merge/src/Main.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// To make it easier to follow the tests:
+//  - all interfaces defined in this file extend InterfaceSuper (except InterfaceOtherSuper)
+//  - all classes defined in this file extend ClassSuper (except ClassOtherSuper)
+
+interface InterfaceSuper {}
+interface InterfaceOtherSuper {}
+
+interface InterfaceA extends InterfaceSuper {}
+interface InterfaceB extends InterfaceSuper {}
+interface InterfaceExtendsA extends InterfaceA {}
+interface InterfaceExtendsB extends InterfaceB {}
+
+class ClassSuper {}
+class ClassOtherSuper {}
+
+class ClassA extends ClassSuper {}
+class ClassB extends ClassSuper {}
+class ClassExtendsA extends ClassA {}
+class ClassExtendsB extends ClassB {}
+
+class ClassImplementsInterfaceA extends ClassSuper implements InterfaceA {}
+
+public class Main {
+
+  /// CHECK-START: java.lang.Object Main.testMergeNullContant(boolean) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:Main
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeNullContant(boolean cond) {
+    return cond ? null : new Main();
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassExtendsB) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassExtendsB b) {
+    // Different classes, have a common super type.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassExtendsA, ClassSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassExtendsA a, ClassSuper b) {
+    // Different classes, one is the super type of the other.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:ClassSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassSuper a, ClassSuper b) {
+    // Same classes.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClasses(boolean, ClassOtherSuper, ClassSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClasses(boolean cond, ClassOtherSuper a, ClassSuper b) {
+    // Different classes, have Object as the common super type.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassImplementsInterfaceA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClassWithInterface(boolean cond, ClassImplementsInterfaceA a, InterfaceSuper b) {
+    // Class implements interface.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeClassWithInterface(boolean, ClassSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeClassWithInterface(boolean cond, ClassSuper a, InterfaceSuper b) {
+    // Class doesn't implement interface.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceSuper b) {
+    // Different Interfaces, one implements the other.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:InterfaceSuper
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceSuper b) {
+    // Same interfaces.
+    return cond ? a : b;
+  }
+
+  /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceExtendsA, InterfaceExtendsB) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceExtendsA a, InterfaceExtendsB b) {
+    // Different Interfaces, have a common super type.
+    return cond ? a : b;
+  }
+
+    /// CHECK-START: java.lang.Object Main.testMergeInterfaces(boolean, InterfaceSuper, InterfaceOtherSuper) reference_type_propagation (after)
+  /// CHECK:      <<Phi:l\d+>>       Phi klass:java.lang.Object
+  /// CHECK:                         Return [<<Phi>>]
+  private Object testMergeInterfaces(boolean cond, InterfaceSuper a, InterfaceOtherSuper b) {
+    // Different interfaces.
+    return cond ? a : b;
+  }
+
+  public static void main(String[] args) {
+  }
+}
diff --git a/test/550-checker-multiply-accumulate/expected.txt b/test/550-checker-multiply-accumulate/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/expected.txt
diff --git a/test/550-checker-multiply-accumulate/info.txt b/test/550-checker-multiply-accumulate/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
new file mode 100644
index 0000000..2d0688d
--- /dev/null
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -0,0 +1,234 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /**
+   * Test basic merging of `MUL+ADD` into `MULADD`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Add>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  public static int $opt$noinline$mulAdd(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    return acc + left * right;
+  }
+
+  /**
+   * Test basic merging of `MUL+SUB` into `MULSUB`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:                            Return [<<Sub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  public static long $opt$noinline$mulSub(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    return acc - left * right;
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction when there
+   * are other uses of the multiplication that cannot merge it.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Left:i\d+>>        ParameterValue
+  /// CHECK:       <<Right:i\d+>>       ParameterValue
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:i\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Or:i\d+>>          Or [<<Mul>>,<<Add>>]
+  /// CHECK:                            Return [<<Or>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+
+  public static int $opt$noinline$multipleUses1(int acc, int left, int right) {
+    if (doThrow) throw new Error();
+    int temp = left * right;
+    return temp | (acc + temp);
+  }
+
+  /**
+   * Test that we do not create a multiply-accumulate instruction even when all
+   * uses of the multiplication can merge it.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Left:j\d+>>        ParameterValue
+  /// CHECK:       <<Right:j\d+>>       ParameterValue
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Left>>,<<Right>>]
+  /// CHECK:       <<Add:j\d+>>         Add [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Acc>>,<<Mul>>]
+  /// CHECK:       <<Res:j\d+>>         Add [<<Add>>,<<Sub>>]
+  /// CHECK:                            Return [<<Res>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64MultiplyAccumulate
+
+
+  public static long $opt$noinline$multipleUses2(long acc, long left, long right) {
+    if (doThrow) throw new Error();
+    long temp = left * right;
+    return (acc + temp) + (acc - temp);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (b + 1)` as `a + (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<Const1:i\d+>>      IntConstant 1
+  /// CHECK:       <<Add:i\d+>>         Add [<<Var>>,<<Const1>>]
+  /// CHECK:       <<Mul:i\d+>>         Mul [<<Acc>>,<<Add>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:i\d+>>         ParameterValue
+  /// CHECK:       <<Var:i\d+>>         ParameterValue
+  /// CHECK:       <<MulAdd:i\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add
+  /// CHECK:                            Return [<<MulAdd>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Add
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after)
+  /// CHECK:                            madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}}
+
+  public static int $opt$noinline$mulPlusOne(int acc, int var) {
+    if (doThrow) throw new Error();
+    return acc * (var + 1);
+  }
+
+
+  /**
+   * Test the interpretation of `a * (1 - b)` as `a - (a * b)`.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<Const1:j\d+>>      LongConstant 1
+  /// CHECK:       <<Sub:j\d+>>         Sub [<<Const1>>,<<Var>>]
+  /// CHECK:       <<Mul:j\d+>>         Mul [<<Acc>>,<<Sub>>]
+  /// CHECK:                            Return [<<Mul>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<Acc:j\d+>>         ParameterValue
+  /// CHECK:       <<Var:j\d+>>         ParameterValue
+  /// CHECK:       <<MulSub:j\d+>>      Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub
+  /// CHECK:                            Return [<<MulSub>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Mul
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after)
+  /// CHECK:                            msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}}
+
+  public static long $opt$noinline$mulMinusOne(long acc, long var) {
+    if (doThrow) throw new Error();
+    return acc * (1 - var);
+  }
+
+
+  public static void main(String[] args) {
+    assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3));
+    assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6));
+    assertIntEquals(79, $opt$noinline$multipleUses1(7, 8, 9));
+    assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12));
+    assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14));
+    assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16));
+  }
+}
diff --git a/test/550-checker-regression-wide-store/expected.txt b/test/550-checker-regression-wide-store/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-checker-regression-wide-store/expected.txt
diff --git a/test/550-checker-regression-wide-store/info.txt b/test/550-checker-regression-wide-store/info.txt
new file mode 100644
index 0000000..6cf04bc
--- /dev/null
+++ b/test/550-checker-regression-wide-store/info.txt
@@ -0,0 +1,3 @@
+Test an SsaBuilder regression where storing into the high vreg of a pair
+would not invalidate the low vreg. The resulting environment would generate
+an incorrect stack map, causing deopt and try/catch to use a wrong location.
\ No newline at end of file
diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali
new file mode 100644
index 0000000..7974d56
--- /dev/null
+++ b/test/550-checker-regression-wide-store/smali/TestCase.smali
@@ -0,0 +1,82 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+.method public static $noinline$throw()V
+  .registers 1
+  new-instance v0, Ljava/lang/Exception;
+  invoke-direct {v0}, Ljava/lang/Exception;-><init>()V
+  throw v0
+.end method
+
+# Test storing into the high vreg of a wide pair. This scenario has runtime
+# behaviour implications so we run it from Main.main.
+
+## CHECK-START: int TestCase.invalidateLow(long) ssa_builder (after)
+## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>]
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[_,<<Cst0>>,<<Arg>>,_]]
+## CHECK-DAG: InvokeStaticOrDirect method_name:TestCase.$noinline$throw  env:[[_,<<Cast>>,<<Arg>>,_]]
+
+.method public static invalidateLow(J)I
+  .registers 4
+
+  const/4 v1, 0x0
+
+  :try_start
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  move-wide v0, p0
+  long-to-int v1, v0
+  invoke-static {}, LTestCase;->$noinline$throw()V
+  :try_end
+  .catchall {:try_start .. :try_end} :catchall
+
+  :catchall
+  return v1
+
+.end method
+
+# Test that storing a wide invalidates the value in the high vreg. This
+# cannot be detected from runtime so we only test the environment with Checker.
+
+## CHECK-START: void TestCase.invalidateHigh1(long) ssa_builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]]
+
+.method public static invalidateHigh1(J)V
+  .registers 4
+
+  const/4 v1, 0x0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
+
+## CHECK-START: void TestCase.invalidateHigh2(long) ssa_builder (after)
+## CHECK-DAG: <<Arg:j\d+>>  ParameterValue
+## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]]
+
+.method public static invalidateHigh2(J)V
+  .registers 5
+
+  move-wide v1, p0
+  move-wide v0, p0
+  invoke-static {}, Ljava/lang/System;->nanoTime()J
+  return-void
+
+.end method
diff --git a/test/550-checker-regression-wide-store/src/Main.java b/test/550-checker-regression-wide-store/src/Main.java
new file mode 100644
index 0000000..9b502df
--- /dev/null
+++ b/test/550-checker-regression-wide-store/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  private static int runTestCase(String name, long arg) throws Exception {
+    Class<?> c = Class.forName("TestCase");
+    Method m = c.getMethod(name, long.class);
+    int result = (Integer) m.invoke(null, arg);
+    return result;
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Wrong result: " + expected + " != " + actual);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    assertEquals(42, runTestCase("invalidateLow", 42L));
+  }
+}
diff --git a/test/550-new-instance-clinit/expected.txt b/test/550-new-instance-clinit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/550-new-instance-clinit/expected.txt
diff --git a/test/550-new-instance-clinit/info.txt b/test/550-new-instance-clinit/info.txt
new file mode 100644
index 0000000..c5fa3c7
--- /dev/null
+++ b/test/550-new-instance-clinit/info.txt
@@ -0,0 +1,3 @@
+Regression test for optimizing which used to treat
+HNewInstance as not having side effects even though it
+could invoke a clinit method.
diff --git a/test/550-new-instance-clinit/src/Main.java b/test/550-new-instance-clinit/src/Main.java
new file mode 100644
index 0000000..45e259e
--- /dev/null
+++ b/test/550-new-instance-clinit/src/Main.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    int foo = Main.a;
+    new Bar();
+    foo = Main.a;
+    if (foo != 43) {
+      throw new Error("Expected 43, got " + foo);
+    }
+  }
+  static int a = 42;
+}
+
+class Bar {
+  static {
+    Main.a++;
+  }
+}
diff --git a/test/551-checker-clinit/expected.txt b/test/551-checker-clinit/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-checker-clinit/expected.txt
diff --git a/test/551-checker-clinit/info.txt b/test/551-checker-clinit/info.txt
new file mode 100644
index 0000000..4d54bb5
--- /dev/null
+++ b/test/551-checker-clinit/info.txt
@@ -0,0 +1 @@
+Checker test to ensure we optimize aways HClinitChecks as expected.
diff --git a/test/551-checker-clinit/src/Main.java b/test/551-checker-clinit/src/Main.java
new file mode 100644
index 0000000..5ec30480
--- /dev/null
+++ b/test/551-checker-clinit/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {}
+  public static int foo = 42;
+
+  /// CHECK-START: void Main.inlinedMethod() builder (after)
+  /// CHECK:                        ClinitCheck
+
+  /// CHECK-START: void Main.inlinedMethod() inliner (after)
+  /// CHECK:                        ClinitCheck
+  /// CHECK-NOT:                    ClinitCheck
+  /// CHECK-NOT:                    InvokeStaticOrDirect
+  public void inlinedMethod() {
+    SubSub.bar();
+  }
+}
+
+class Sub extends Main {
+  /// CHECK-START: void Sub.invokeSuperClass() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeSuperClass() {
+    int a = Main.foo;
+  }
+
+  /// CHECK-START: void Sub.invokeItself() builder (after)
+  /// CHECK-NOT:                        ClinitCheck
+  public void invokeItself() {
+    int a = foo;
+  }
+
+  /// CHECK-START: void Sub.invokeSubClass() builder (after)
+  /// CHECK:                            ClinitCheck
+  public void invokeSubClass() {
+    int a = SubSub.foo;
+  }
+
+  public static int foo = 42;
+}
+
+class SubSub {
+  public static void bar() {
+    int a = Main.foo;
+  }
+  public static int foo = 42;
+}
diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build
new file mode 100644
index 0000000..18e8c59
--- /dev/null
+++ b/test/551-checker-shifter-operand/build
@@ -0,0 +1,212 @@
+#!/bin/bash
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This is an almost exact copy of `art/test/etc/default-build`. Only the parsing
+# of `dx` option has been overriden.
+
+# Stop if something fails.
+set -e
+
+# Set default values for directories.
+if [ -d smali ]; then
+  HAS_SMALI=true
+else
+  HAS_SMALI=false
+fi
+
+if [ -d src ]; then
+  HAS_SRC=true
+else
+  HAS_SRC=false
+fi
+
+if [ -d src2 ]; then
+  HAS_SRC2=true
+else
+  HAS_SRC2=false
+fi
+
+if [ -d src-multidex ]; then
+  HAS_SRC_MULTIDEX=true
+else
+  HAS_SRC_MULTIDEX=false
+fi
+
+if [ -d src-ex ]; then
+  HAS_SRC_EX=true
+else
+  HAS_SRC_EX=false
+fi
+
+DX_FLAGS=""
+SKIP_DX_MERGER="false"
+EXPERIMENTAL=""
+
+# Setup experimental flag mappings in a bash associative array.
+declare -A JACK_EXPERIMENTAL_ARGS
+JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8"
+JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8"
+
+while true; do
+  if [ "x$1" = "x--dx-option" ]; then
+    shift
+    option="$1"
+    # Make sure we run this test *with* `dx` optimizations.
+    if [ "x$option" != "x--no-optimize" ]; then
+      DX_FLAGS="${DX_FLAGS} $option"
+    fi
+    shift
+  elif [ "x$1" = "x--jvm" ]; then
+    shift
+  elif [ "x$1" = "x--no-src" ]; then
+    HAS_SRC=false
+    shift
+  elif [ "x$1" = "x--no-src2" ]; then
+    HAS_SRC2=false
+    shift
+  elif [ "x$1" = "x--no-src-multidex" ]; then
+    HAS_SRC_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-src-ex" ]; then
+    HAS_SRC_EX=false
+    shift
+  elif [ "x$1" = "x--no-smali" ]; then
+    HAS_SMALI=false
+    shift
+  elif [ "x$1" = "x--experimental" ]; then
+    shift
+    EXPERIMENTAL="${EXPERIMENTAL} $1"
+    shift
+  elif expr "x$1" : "x--" >/dev/null 2>&1; then
+    echo "unknown $0 option: $1" 1>&2
+    exit 1
+  else
+    break
+  fi
+done
+
+# Add args from the experimental mappings.
+for experiment in ${EXPERIMENTAL}; do
+  JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+done
+
+if [ -e classes.dex ]; then
+  zip $TEST_NAME.jar classes.dex
+  exit 0
+fi
+
+if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+  # Jack does not support this configuration unless we specify how to partition the DEX file
+  # with a .jpp file.
+  USE_JACK="false"
+fi
+
+if [ ${USE_JACK} = "true" ]; then
+  # Jack toolchain
+  if [ "${HAS_SRC}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src.jack src
+    imported_jack_files="--import src.jack"
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
+    imported_jack_files="--import src2.jack ${imported_jack_files}"
+  fi
+
+  # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
+  # class definitions from src2 first.
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+  fi
+else
+  # Legacy toolchain with javac+dx
+  if [ "${HAS_SRC}" = "true" ]; then
+    mkdir classes
+    ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+    mkdir classes2
+    ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
+        --dump-width=1000 ${DX_FLAGS} classes2
+    fi
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    mkdir -p classes
+    ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
+        --dump-width=1000 ${DX_FLAGS} classes
+    fi
+  fi
+fi
+
+if [ "${HAS_SMALI}" = "true" ]; then
+  # Compile Smali classes
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
+fi
+
+if [ ${HAS_SRC_EX} = "true" ]; then
+  if [ ${USE_JACK} = "true" ]; then
+      # Rename previous "classes.dex" so it is not overwritten.
+      mv classes.dex classes-1.dex
+      #TODO find another way to append src.jack to the jack classpath
+      ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex
+      zip $TEST_NAME-ex.jar classes.dex
+      # Restore previous "classes.dex" so it can be zipped.
+      mv classes-1.dex classes.dex
+  else
+    mkdir classes-ex
+    ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \
+        --dump-width=1000 ${DX_FLAGS} classes-ex
+
+      # quick shuffle so that the stored name is "classes.dex"
+      mv classes.dex classes-1.dex
+      mv classes-ex.dex classes.dex
+      zip $TEST_NAME-ex.jar classes.dex
+      mv classes.dex classes-ex.dex
+      mv classes-1.dex classes.dex
+    fi
+  fi
+fi
+
+# Create a single jar with two dex files for multidex.
+if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex classes2.dex
+elif [ ${NEED_DEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex
+fi
diff --git a/test/551-checker-shifter-operand/expected.txt b/test/551-checker-shifter-operand/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-checker-shifter-operand/expected.txt
diff --git a/test/551-checker-shifter-operand/info.txt b/test/551-checker-shifter-operand/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/551-checker-shifter-operand/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
new file mode 100644
index 0000000..decdd1f
--- /dev/null
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -0,0 +1,678 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertByteEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertShortEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  // Non-inlinable type-casting helpers.
+  static  char $noinline$byteToChar   (byte v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$byteToShort  (byte v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$byteToInt    (byte v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$byteToLong   (byte v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$charToByte   (char v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static short $noinline$charToShort  (char v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$charToInt    (char v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$charToLong   (char v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$shortToByte (short v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$shortToChar (short v) { if (doThrow) throw new Error(); return  (char)v; }
+  static   int $noinline$shortToInt  (short v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$shortToLong (short v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$intToByte     (int v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$intToChar     (int v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$intToShort    (int v) { if (doThrow) throw new Error(); return (short)v; }
+  static  long $noinline$intToLong     (int v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$longToByte   (long v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$longToChar   (long v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$longToShort  (long v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$longToInt    (long v) { if (doThrow) throw new Error(); return   (int)v; }
+
+  /**
+   * Basic test merging a bitfield move operation (here a type conversion) into
+   * the shifter operand.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:       <<tmp:j\d+>>         TypeConversion [<<b>>]
+  /// CHECK:                            Sub [<<l>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) disassembly (after)
+  /// CHECK:                            sub x{{\d+}}, x{{\d+}}, w{{\d+}}, sxtb
+
+  public static long $opt$noinline$translate(long l, byte b) {
+    if (doThrow) throw new Error();
+    long tmp = (long)b;
+    return l - tmp;
+  }
+
+
+  /**
+   * Test that we do not merge into the shifter operand when the left and right
+   * inputs are the the IR.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<a:i\d+>>           ParameterValue
+  /// CHECK:       <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<tmp>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<a:i\d+>>           ParameterValue
+  /// CHECK-DAG:   <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<Shl:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<Shl>>,<<Shl>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static int $opt$noinline$sameInput(int a) {
+    if (doThrow) throw new Error();
+    int tmp = a << 2;
+    return tmp + tmp;
+  }
+
+  /**
+   * Check that we perform the merge for multiple uses.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:       <<Const23:i\d+>>     IntConstant 23
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<arg>>,<<Const23>>]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Add
+
+  public static int $opt$noinline$multipleUses(int arg) {
+    if (doThrow) throw new Error();
+    int tmp = arg << 23;
+    switch (arg) {
+      case 1:  return (arg | 1) + tmp;
+      case 2:  return (arg | 2) + tmp;
+      case 3:  return (arg | 3) + tmp;
+      case 4:  return (arg | 4) + tmp;
+      case (1 << 20):  return (arg | 5) + tmp;
+      default: return 0;
+    }
+  }
+
+  /**
+   * Logical instructions cannot take 'extend' operations into the shift
+   * operand, so test that only the shifts are merged.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
+  /// CHECK:                            and lsl
+  /// CHECK:                            sxtb
+  /// CHECK:                            and
+
+  static void $opt$noinline$testAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a & $noinline$LongShl(b, 5)) | (a & $noinline$longToByte(b)),
+                     (a & (b << 5)) | (a & (byte)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after)
+  /// CHECK:                            orr asr
+  /// CHECK:                            uxth
+  /// CHECK:                            orr
+
+  static void $opt$noinline$testOr(int a, int b) {
+    if (doThrow) throw new Error();
+    assertIntEquals((a | $noinline$IntShr(b, 6)) | (a | $noinline$intToChar(b)),
+                    (a | (b >> 6)) | (a | (char)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after)
+  /// CHECK:                            eor lsr
+  /// CHECK:                            sxtw
+  /// CHECK:                            eor
+
+  static void $opt$noinline$testXor(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a ^ $noinline$LongUshr(b, 7)) | (a ^ $noinline$longToInt(b)),
+                     (a ^ (b >>> 7)) | (a ^ (int)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after)
+  /// CHECK:                            neg lsl
+  /// CHECK:                            sxth
+  /// CHECK:                            neg
+
+  static void $opt$noinline$testNeg(int a) {
+    if (doThrow) throw new Error();
+    assertIntEquals(-$noinline$IntShl(a, 8) | -$noinline$intToShort(a),
+                    (-(a << 8)) | (-(short)a));
+  }
+
+  /**
+   * The functions below are used to compare the result of optimized operations
+   * to non-optimized operations.
+   * On the left-hand side we use a non-inlined function call to ensure the
+   * optimization does not occur. The checker tests ensure that the optimization
+   * does occur on the right-hand.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteInt1(int a, byte b) {
+    assertIntEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    assertIntEquals(a + $noinline$byteToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendByteInt2(int a, byte b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$byteToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$byteToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteLong(long a, byte b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$byteToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$byteToInt  (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendByte(long a, byte b) {
+    $opt$validateExtendByteInt1((int)a, b);
+    $opt$validateExtendByteInt2((int)a, b);
+    $opt$validateExtendByteLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharInt1(int a, char b) {
+    assertIntEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertIntEquals(a + $noinline$charToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendCharInt2(int a, char b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$charToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$charToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharLong(long a, char b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$charToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$charToInt  (b), a +   (int)b);
+  }
+
+  public static void $opt$validateExtendChar(long a, char b) {
+    $opt$validateExtendCharInt1((int)a, b);
+    $opt$validateExtendCharInt2((int)a, b);
+    $opt$validateExtendCharLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortInt1(int a, short b) {
+    assertIntEquals(a + $noinline$shortToByte (b), a + (byte)b);
+    assertIntEquals(a + $noinline$shortToChar (b), a + (char)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendShortInt2(int a, short b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$shortToInt  (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$shortToLong (b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortLong(long a, short b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b);
+    assertLongEquals(a + $noinline$shortToChar(b), a + (char)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$shortToInt (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendShort(long a, short b) {
+    $opt$validateExtendShortInt1((int)a, b);
+    $opt$validateExtendShortInt2((int)a, b);
+    $opt$validateExtendShortLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendInt(long a, int b) {
+    // All tests have a conversion to `long`. The first three tests also have a
+    // conversion from `int` to the specified type. For each test the conversion
+    // to `long` is merged into the shifter operand.
+    assertLongEquals(a + $noinline$intToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$intToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$intToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$intToLong (b), a +  (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendLong(long a, long b) {
+    // Each test has two conversions, from `long` and then back to `long`. The
+    // conversions to `long` are merged.
+    assertLongEquals(a + $noinline$longToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$longToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$longToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$longToInt  (b), a +   (int)b);
+  }
+
+
+  static int $noinline$IntShl(int b, int c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static int $noinline$IntShr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static int $noinline$IntUshr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftInt(int a, int b) {
+    assertIntEquals(a + $noinline$IntShl(b, 1),   a + (b <<  1));
+    assertIntEquals(a + $noinline$IntShl(b, 6),   a + (b <<  6));
+    assertIntEquals(a + $noinline$IntShl(b, 7),   a + (b <<  7));
+    assertIntEquals(a + $noinline$IntShl(b, 8),   a + (b <<  8));
+    assertIntEquals(a + $noinline$IntShl(b, 14),  a + (b << 14));
+    assertIntEquals(a + $noinline$IntShl(b, 15),  a + (b << 15));
+    assertIntEquals(a + $noinline$IntShl(b, 16),  a + (b << 16));
+    assertIntEquals(a + $noinline$IntShl(b, 30),  a + (b << 30));
+    assertIntEquals(a + $noinline$IntShl(b, 31),  a + (b << 31));
+    assertIntEquals(a + $noinline$IntShl(b, 32),  a + (b << 32));
+    assertIntEquals(a + $noinline$IntShl(b, 62),  a + (b << 62));
+    assertIntEquals(a + $noinline$IntShl(b, 63),  a + (b << 63));
+
+    assertIntEquals(a - $noinline$IntShr(b, 1),   a - (b >>  1));
+    assertIntEquals(a - $noinline$IntShr(b, 6),   a - (b >>  6));
+    assertIntEquals(a - $noinline$IntShr(b, 7),   a - (b >>  7));
+    assertIntEquals(a - $noinline$IntShr(b, 8),   a - (b >>  8));
+    assertIntEquals(a - $noinline$IntShr(b, 14),  a - (b >> 14));
+    assertIntEquals(a - $noinline$IntShr(b, 15),  a - (b >> 15));
+    assertIntEquals(a - $noinline$IntShr(b, 16),  a - (b >> 16));
+    assertIntEquals(a - $noinline$IntShr(b, 30),  a - (b >> 30));
+    assertIntEquals(a - $noinline$IntShr(b, 31),  a - (b >> 31));
+    assertIntEquals(a - $noinline$IntShr(b, 32),  a - (b >> 32));
+    assertIntEquals(a - $noinline$IntShr(b, 62),  a - (b >> 62));
+    assertIntEquals(a - $noinline$IntShr(b, 63),  a - (b >> 63));
+
+    assertIntEquals(a ^ $noinline$IntUshr(b, 1),   a ^ (b >>>  1));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 6),   a ^ (b >>>  6));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 7),   a ^ (b >>>  7));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 8),   a ^ (b >>>  8));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 14),  a ^ (b >>> 14));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 15),  a ^ (b >>> 15));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 16),  a ^ (b >>> 16));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 30),  a ^ (b >>> 30));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 31),  a ^ (b >>> 31));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 32),  a ^ (b >>> 32));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 62),  a ^ (b >>> 62));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  static long $noinline$LongShl(long b, long c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static long $noinline$LongShr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static long $noinline$LongUshr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftLong(long a, long b) {
+    assertLongEquals(a + $noinline$LongShl(b, 1),   a + (b <<  1));
+    assertLongEquals(a + $noinline$LongShl(b, 6),   a + (b <<  6));
+    assertLongEquals(a + $noinline$LongShl(b, 7),   a + (b <<  7));
+    assertLongEquals(a + $noinline$LongShl(b, 8),   a + (b <<  8));
+    assertLongEquals(a + $noinline$LongShl(b, 14),  a + (b << 14));
+    assertLongEquals(a + $noinline$LongShl(b, 15),  a + (b << 15));
+    assertLongEquals(a + $noinline$LongShl(b, 16),  a + (b << 16));
+    assertLongEquals(a + $noinline$LongShl(b, 30),  a + (b << 30));
+    assertLongEquals(a + $noinline$LongShl(b, 31),  a + (b << 31));
+    assertLongEquals(a + $noinline$LongShl(b, 32),  a + (b << 32));
+    assertLongEquals(a + $noinline$LongShl(b, 62),  a + (b << 62));
+    assertLongEquals(a + $noinline$LongShl(b, 63),  a + (b << 63));
+
+    assertLongEquals(a - $noinline$LongShr(b, 1),   a - (b >>  1));
+    assertLongEquals(a - $noinline$LongShr(b, 6),   a - (b >>  6));
+    assertLongEquals(a - $noinline$LongShr(b, 7),   a - (b >>  7));
+    assertLongEquals(a - $noinline$LongShr(b, 8),   a - (b >>  8));
+    assertLongEquals(a - $noinline$LongShr(b, 14),  a - (b >> 14));
+    assertLongEquals(a - $noinline$LongShr(b, 15),  a - (b >> 15));
+    assertLongEquals(a - $noinline$LongShr(b, 16),  a - (b >> 16));
+    assertLongEquals(a - $noinline$LongShr(b, 30),  a - (b >> 30));
+    assertLongEquals(a - $noinline$LongShr(b, 31),  a - (b >> 31));
+    assertLongEquals(a - $noinline$LongShr(b, 32),  a - (b >> 32));
+    assertLongEquals(a - $noinline$LongShr(b, 62),  a - (b >> 62));
+    assertLongEquals(a - $noinline$LongShr(b, 63),  a - (b >> 63));
+
+    assertLongEquals(a ^ $noinline$LongUshr(b, 1),   a ^ (b >>>  1));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 6),   a ^ (b >>>  6));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 7),   a ^ (b >>>  7));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 8),   a ^ (b >>>  8));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 14),  a ^ (b >>> 14));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 15),  a ^ (b >>> 15));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 16),  a ^ (b >>> 16));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 30),  a ^ (b >>> 30));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 31),  a ^ (b >>> 31));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 32),  a ^ (b >>> 32));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 62),  a ^ (b >>> 62));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  public static void main(String[] args) {
+    assertLongEquals(10000L - 3L, $opt$noinline$translate(10000L, (byte)3));
+    assertLongEquals(-10000L - -3L, $opt$noinline$translate(-10000L, (byte)-3));
+
+    assertIntEquals(4096, $opt$noinline$sameInput(512));
+    assertIntEquals(-8192, $opt$noinline$sameInput(-1024));
+
+    assertIntEquals(((1 << 23) | 1), $opt$noinline$multipleUses(1));
+    assertIntEquals(((1 << 20) | 5), $opt$noinline$multipleUses(1 << 20));
+
+    long inputs[] = {
+      -((1L <<  7) - 1L), -((1L <<  7)), -((1L <<  7) + 1L),
+      -((1L << 15) - 1L), -((1L << 15)), -((1L << 15) + 1L),
+      -((1L << 16) - 1L), -((1L << 16)), -((1L << 16) + 1L),
+      -((1L << 31) - 1L), -((1L << 31)), -((1L << 31) + 1L),
+      -((1L << 32) - 1L), -((1L << 32)), -((1L << 32) + 1L),
+      -((1L << 63) - 1L), -((1L << 63)), -((1L << 63) + 1L),
+      -42L, -314L, -2718281828L, -0x123456789L, -0x987654321L,
+      -1L, -20L, -300L, -4000L, -50000L, -600000L, -7000000L, -80000000L,
+      0L,
+      1L, 20L, 300L, 4000L, 50000L, 600000L, 7000000L, 80000000L,
+      42L,  314L,  2718281828L,  0x123456789L,  0x987654321L,
+      (1L <<  7) - 1L, (1L <<  7), (1L <<  7) + 1L,
+      (1L <<  8) - 1L, (1L <<  8), (1L <<  8) + 1L,
+      (1L << 15) - 1L, (1L << 15), (1L << 15) + 1L,
+      (1L << 16) - 1L, (1L << 16), (1L << 16) + 1L,
+      (1L << 31) - 1L, (1L << 31), (1L << 31) + 1L,
+      (1L << 32) - 1L, (1L << 32), (1L << 32) + 1L,
+      (1L << 63) - 1L, (1L << 63), (1L << 63) + 1L,
+      Long.MIN_VALUE, Long.MAX_VALUE
+    };
+    for (int i = 0; i < inputs.length; i++) {
+      $opt$noinline$testNeg((int)inputs[i]);
+      for (int j = 0; j < inputs.length; j++) {
+        $opt$noinline$testAnd(inputs[i], inputs[j]);
+        $opt$noinline$testOr((int)inputs[i], (int)inputs[j]);
+        $opt$noinline$testXor(inputs[i], inputs[j]);
+
+        $opt$validateExtendByte(inputs[i], (byte)inputs[j]);
+        $opt$validateExtendChar(inputs[i], (char)inputs[j]);
+        $opt$validateExtendShort(inputs[i], (short)inputs[j]);
+        $opt$validateExtendInt(inputs[i], (int)inputs[j]);
+        $opt$validateExtendLong(inputs[i], inputs[j]);
+
+        $opt$validateShiftInt((int)inputs[i], (int)inputs[j]);
+        $opt$validateShiftLong(inputs[i], inputs[j]);
+      }
+    }
+
+  }
+}
diff --git a/test/551-implicit-null-checks/expected.txt b/test/551-implicit-null-checks/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-implicit-null-checks/expected.txt
diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt
new file mode 100644
index 0000000..bdd066b
--- /dev/null
+++ b/test/551-implicit-null-checks/info.txt
@@ -0,0 +1 @@
+Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file
diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java
new file mode 100644
index 0000000..677e8d3
--- /dev/null
+++ b/test/551-implicit-null-checks/src/Main.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  private class Inner {
+    private long i1;
+  }
+  private Inner inst;
+
+  public static void main(String args[]) throws Exception {
+    Main m = new Main();
+    try {
+      m.$opt$noinline$testGetLong();
+    } catch (NullPointerException ex) {
+      // good
+    }
+    try {
+      m.$opt$noinline$testPutLong(778899112233L);
+    } catch (NullPointerException ex) {
+      // good
+    }
+  }
+
+  public void $opt$noinline$testGetLong() throws Exception {
+    long result = inst.i1;
+    throw new Exception();  // prevent inline
+  }
+
+  public void $opt$noinline$testPutLong(long a) throws Exception {
+    inst.i1 = a;
+    throw new Exception();  // prevent inline
+  }
+}
diff --git a/test/551-invoke-super/expected.txt b/test/551-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-invoke-super/expected.txt
diff --git a/test/551-invoke-super/info.txt b/test/551-invoke-super/info.txt
new file mode 100644
index 0000000..864ddfe
--- /dev/null
+++ b/test/551-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when resolving to an abstract method.
diff --git a/test/551-invoke-super/smali/invokesuper.smali b/test/551-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/551-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/551-invoke-super/smali/superclass.smali b/test/551-invoke-super/smali/superclass.smali
new file mode 100644
index 0000000..47fbee7
--- /dev/null
+++ b/test/551-invoke-super/smali/superclass.smali
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method abstract public returnInt()I
+.end method
diff --git a/test/551-invoke-super/src/Main.java b/test/551-invoke-super/src/Main.java
new file mode 100644
index 0000000..3a30184
--- /dev/null
+++ b/test/551-invoke-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected AbstractMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof AbstractMethodError)) {
+        throw new Error("Expected AbstractMethodError");
+      }
+    }
+  }
+}
diff --git a/test/552-checker-sharpening/expected.txt b/test/552-checker-sharpening/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-sharpening/expected.txt
diff --git a/test/552-checker-sharpening/info.txt b/test/552-checker-sharpening/info.txt
new file mode 100644
index 0000000..c84539c
--- /dev/null
+++ b/test/552-checker-sharpening/info.txt
@@ -0,0 +1 @@
+Tests for sharpening.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
new file mode 100644
index 0000000..d50edd8
--- /dev/null
+++ b/test/552-checker-sharpening/src/Main.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean doThrow = false;
+
+  private static int $noinline$foo(int x) {
+    if (doThrow) { throw new Error(); }
+    return x;
+  }
+
+  /// CHECK-START: int Main.testSimple(int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  public static int testSimple(int x) {
+    // This call should use PC-relative dex cache array load to retrieve the target method.
+    return $noinline$foo(x);
+  }
+
+  /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           If
+
+  public static int testDiamond(boolean negate, int x) {
+    // These calls should use PC-relative dex cache array loads to retrieve the target method.
+    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    if (negate) {
+      return $noinline$foo(-x);
+    } else {
+      return $noinline$foo(x);
+    }
+  }
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  public static int testLoop(int[] array, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    for (int i : array) {
+      x += $noinline$foo(i);
+    }
+    return x;
+  }
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // but not outside the if.
+    if (array != null) {
+      for (int i : array) {
+        if (negate) {
+          x += $noinline$foo(-i);
+        } else {
+          x += $noinline$foo(i);
+        }
+      }
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(1, testSimple(1));
+    assertIntEquals(1, testDiamond(false, 1));
+    assertIntEquals(-1, testDiamond(true, 1));
+    assertIntEquals(3, testLoop(new int[]{ 2 }, 1));
+    assertIntEquals(8, testLoop(new int[]{ 3, 4 }, 1));
+    assertIntEquals(1, testLoopWithDiamond(null, false, 1));
+    assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
+    assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+  }
+}
diff --git a/test/552-invoke-non-existent-super/expected.txt b/test/552-invoke-non-existent-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-invoke-non-existent-super/expected.txt
diff --git a/test/552-invoke-non-existent-super/info.txt b/test/552-invoke-non-existent-super/info.txt
new file mode 100644
index 0000000..c5428d4
--- /dev/null
+++ b/test/552-invoke-non-existent-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when the super class does not have the method.
diff --git a/test/552-invoke-non-existent-super/smali/invokesuper.smali b/test/552-invoke-non-existent-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/552-invoke-non-existent-super/smali/superclass.smali b/test/552-invoke-non-existent-super/smali/superclass.smali
new file mode 100644
index 0000000..21d961e
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/superclass.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/552-invoke-non-existent-super/src/Main.java b/test/552-invoke-non-existent-super/src/Main.java
new file mode 100644
index 0000000..c264471
--- /dev/null
+++ b/test/552-invoke-non-existent-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected NoSuchMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+  }
+}
diff --git a/test/553-invoke-super/expected.txt b/test/553-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/553-invoke-super/expected.txt
diff --git a/test/553-invoke-super/info.txt b/test/553-invoke-super/info.txt
new file mode 100644
index 0000000..ad99030
--- /dev/null
+++ b/test/553-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode.
diff --git a/test/553-invoke-super/smali/invokesuper.smali b/test/553-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..a6f9b4e
--- /dev/null
+++ b/test/553-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {v1}, LInvokeSuper;->$noinline$returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public $noinline$returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/553-invoke-super/src/Main.java b/test/553-invoke-super/src/Main.java
new file mode 100644
index 0000000..91d2394
--- /dev/null
+++ b/test/553-invoke-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/553-invoke-super/src/SuperClass.java b/test/553-invoke-super/src/SuperClass.java
new file mode 100644
index 0000000..36ce093
--- /dev/null
+++ b/test/553-invoke-super/src/SuperClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  boolean doThrow = false;
+
+  public int $noinline$returnInt() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 42;
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 17c1f00..a590cf1 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -46,4 +46,5 @@
 b/23300986 (2)
 b/23502994 (if-eqz)
 b/23502994 (check-cast)
+b/25494456
 Done!
diff --git a/test/800-smali/smali/b_25494456.smali b/test/800-smali/smali/b_25494456.smali
new file mode 100644
index 0000000..0675b27
--- /dev/null
+++ b/test/800-smali/smali/b_25494456.smali
@@ -0,0 +1,14 @@
+.class public LB25494456;
+
+.super Ljava/lang/Object;
+
+# Ensure that a type mismatch (integral/float vs reference) overrides a soft failure (because of
+# an unresolvable type) in return-object.
+
+.method public static run()Lwont/be/Resolvable;
+    .registers 1
+
+    const/4 v0, 1
+    return-object v0
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f75747d..4844848 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -137,6 +137,8 @@
                 new Object[] { new Object() }, null, null));
         testCases.add(new TestCase("b/23502994 (check-cast)", "B23502994", "runCHECKCAST",
                 new Object[] { "abc" }, null, null));
+        testCases.add(new TestCase("b/25494456", "B25494456", "run", null, new VerifyError(),
+                null));
     }
 
     public void runTests() {
diff --git a/test/960-default-smali/build b/test/960-default-smali/build
index 3946de3..b72afcd 100755
--- a/test/960-default-smali/build
+++ b/test/960-default-smali/build
@@ -20,18 +20,19 @@
 # Generate the smali Main.smali file or fail
 ${ANDROID_BUILD_TOP}/art/test/utils/python/generate_smali_main.py ./smali
 
-USES_JAVA="false"
-if [[ $ARGS == *"--jvm"* ]]; then
-  USES_JAVA="true"
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA_SOURCE="true"
 elif [[ "$USE_JACK" == "true" ]]; then
   if $JACK -D jack.java.source.version=1.8 >& /dev/null; then
-    USES_JAVA="true"
+    USES_JAVA_SOURCE="true"
   else
     echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
   fi
 fi
 
-if [[ "$USES_JAVA" == "true" ]]; then
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
   # We are compiling java code, create it.
   mkdir -p src
   ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build
index 03cc624..005f76c 100755
--- a/test/961-default-iface-resolution-generated/build
+++ b/test/961-default-iface-resolution-generated/build
@@ -31,18 +31,19 @@
 # Generate the smali files and expected.txt or fail
 ./util-src/generate_smali.py ./smali ./expected.txt
 
-USES_JAVA="false"
-if [[ $ARGS == *"--jvm"* ]]; then
-  USES_JAVA="true"
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA_SOURCE="true"
 elif [[ $USE_JACK == "true" ]]; then
   if "$JACK" -D jack.java.source.version=1.8 >& /dev/null; then
-    USES_JAVA="true"
+    USES_JAVA_SOURCE="true"
   else
     echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
   fi
 fi
 
-if [[ "$USES_JAVA" == "true" ]]; then
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
   # We are compiling java code, create it.
   mkdir -p src
   ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
diff --git a/test/962-iface-static/build b/test/962-iface-static/build
index 24e2feb..e17272f 100755
--- a/test/962-iface-static/build
+++ b/test/962-iface-static/build
@@ -17,18 +17,19 @@
 # make us exit on a failure
 set -e
 
-USES_JAVA="false"
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
 if [[ $@ == *"--jvm"* ]]; then
-  USES_JAVA="true"
+  USES_JAVA_SOURCE="true"
 elif [[ "$USE_JACK" == "true" ]]; then
   if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
-    USES_JAVA="true"
+    USES_JAVA_SOURCE="true"
   else
     echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
   fi
 fi
 
-if [[ "$USES_JAVA" == "true" ]]; then
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
   # We are compiling java code, create it.
   mkdir -p src
   ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
diff --git a/test/963-default-range-smali/build b/test/963-default-range-smali/build
index 24e2feb..e17272f 100755
--- a/test/963-default-range-smali/build
+++ b/test/963-default-range-smali/build
@@ -17,18 +17,19 @@
 # make us exit on a failure
 set -e
 
-USES_JAVA="false"
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
 if [[ $@ == *"--jvm"* ]]; then
-  USES_JAVA="true"
+  USES_JAVA_SOURCE="true"
 elif [[ "$USE_JACK" == "true" ]]; then
   if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
-    USES_JAVA="true"
+    USES_JAVA_SOURCE="true"
   else
     echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
   fi
 fi
 
-if [[ "$USES_JAVA" == "true" ]]; then
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
   # We are compiling java code, create it.
   mkdir -p src
   ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
diff --git a/test/964-default-iface-init-generated/build b/test/964-default-iface-init-generated/build
index d916f1b..0780da1 100755
--- a/test/964-default-iface-init-generated/build
+++ b/test/964-default-iface-init-generated/build
@@ -29,18 +29,19 @@
 # Generate the smali files and expected.txt or fail
 ./util-src/generate_smali.py ./smali ./expected.txt
 
-USES_JAVA="false"
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
 if [[ $@ == *"--jvm"* ]]; then
-  USES_JAVA="true"
+  USES_JAVA_SOURCE="true"
 elif [[ "$USE_JACK" == "true" ]]; then
   if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
-    USES_JAVA="true"
+    USES_JAVA_SOURCE="true"
   else
     echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
   fi
 fi
 
-if [[ "$USES_JAVA" == "true" ]]; then
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
   # We are compiling java code, create it.
   mkdir -p src
   ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
diff --git a/test/964-default-iface-init-generated/util-src/generate_smali.py b/test/964-default-iface-init-generated/util-src/generate_smali.py
index 3c138ab..c0ba157 100755
--- a/test/964-default-iface-init-generated/util-src/generate_smali.py
+++ b/test/964-default-iface-init-generated/util-src/generate_smali.py
@@ -477,12 +477,12 @@
       ifaces = []
       for sub in split:
         ifaces.append(list(create_interface_trees(sub)))
-    for supers in itertools.product(*ifaces):
-      yield TestClass(clone_all(supers))
-      for i in range(len(set(dump_tree(supers)) - set(supers))):
-        ns = clone_all(supers)
-        selected = sorted(set(dump_tree(ns)) - set(ns))[i]
-        yield TestClass(tuple([selected] + list(ns)))
+      for supers in itertools.product(*ifaces):
+        yield TestClass(clone_all(supers))
+        for i in range(len(set(dump_tree(supers)) - set(supers))):
+          ns = clone_all(supers)
+          selected = sorted(set(dump_tree(ns)) - set(ns))[i]
+          yield TestClass(tuple([selected] + list(ns)))
 
 def create_interface_trees(num):
   """
diff --git a/test/965-default-verify/build b/test/965-default-verify/build
new file mode 100755
index 0000000..5ba5438
--- /dev/null
+++ b/test/965-default-verify/build
@@ -0,0 +1,48 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# Should we compile with Java source code. By default we will use Smali.
+USES_JAVA_SOURCE="false"
+if [[ $@ == *"--jvm"* ]]; then
+  USES_JAVA_SOURCE="true"
+elif [[ "$USE_JACK" == "true" ]]; then
+  if $JACK -D jack.java.source.version=1.8 2>/dev/null; then
+    USES_JAVA_SOURCE="true"
+  else
+    echo "WARNING: Cannot use jack because it does not support JLS 1.8. Falling back to smali" >&2
+  fi
+fi
+
+if [[ "$USES_JAVA_SOURCE" == "true" ]]; then
+  # We are compiling Java code, create it.
+  mkdir -p src
+  mkdir -p src2
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Move build-src to src and the src copies to src2. This is needed because of
+  # how our default build script works and we wanted the java and smali code
+  # to be the same in the smali files.
+  for f in `find ./build-src -type f -name "*.java" | xargs -i basename \{\}`; do
+    mv ./src/$f ./src2/$f
+    mv ./build-src/$f ./src/$f
+  done
+  # Ignore the smali directory.
+  EXTRA_ARGS="--no-smali"
+fi
+
+./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
diff --git a/test/965-default-verify/build-src/Statics.java b/test/965-default-verify/build-src/Statics.java
new file mode 100644
index 0000000..300aeec
--- /dev/null
+++ b/test/965-default-verify/build-src/Statics.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Statics {
+  public static void nonexistantFunction() {
+      System.out.println("I don't exist");
+  }
+}
+
diff --git a/test/965-default-verify/expected.txt b/test/965-default-verify/expected.txt
new file mode 100644
index 0000000..b31314f
--- /dev/null
+++ b/test/965-default-verify/expected.txt
@@ -0,0 +1,15 @@
+Create Main instance
+Calling functions on concrete Main
+Calling verifiable function on Main
+Hello
+Calling unverifiable function on Main
+Expected NSME Thrown on Main
+Calling verifiable function on Main
+Hello
+Calling functions on interface Iface
+Calling verifiable function on Iface
+Hello
+Calling unverifiable function on Iface
+Expected NSME Thrown on Iface
+Calling verifiable function on Iface
+Hello
diff --git a/test/965-default-verify/info.txt b/test/965-default-verify/info.txt
new file mode 100644
index 0000000..2ccabf5
--- /dev/null
+++ b/test/965-default-verify/info.txt
@@ -0,0 +1,8 @@
+Smali-based tests for verification interaction with experimental interface
+default methods.
+
+build-src contains java files that are needed if you are to compile with javac
+since it is much more proactive about finding likely runtime errors then smali.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/965-default-verify/run b/test/965-default-verify/run
new file mode 100755
index 0000000..8944ea9
--- /dev/null
+++ b/test/965-default-verify/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/965-default-verify/smali/Iface.smali b/test/965-default-verify/smali/Iface.smali
new file mode 100644
index 0000000..74799a6
--- /dev/null
+++ b/test/965-default-verify/smali/Iface.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface {
+#   public default String sayHi() {
+#       return "Hello";
+#   }
+#
+#   public default void verificationSoftFail() {
+#       Statics.nonexistantFunction();
+#   }
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public sayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "Hello"
+    return-object v0
+.end method
+
+.method public verificationSoftFail()V
+    .locals 1
+    invoke-static {}, LStatics;->nonexistantFunction()V
+    return-void
+.end method
diff --git a/test/965-default-verify/smali/Main.smali b/test/965-default-verify/smali/Main.smali
new file mode 100644
index 0000000..8e90706
--- /dev/null
+++ b/test/965-default-verify/smali/Main.smali
@@ -0,0 +1,179 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main implements Iface {
+#   public static void main(String[] args) {
+#       System.out.println("Create Main instance");
+#       Main m = new Main();
+#       System.out.println("Calling functions on concrete Main");
+#       callMain(m);
+#       System.out.println("Calling functions on interface Iface");
+#       callIface(m);
+#   }
+#
+#   public static void callMain(Main m) {
+#       System.out.println("Calling verifiable function on Main");
+#       System.out.println(m.sayHi());
+#       System.out.println("Calling unverifiable function on Main");
+#       try {
+#           m.verificationSoftFail();
+#           System.out.println("Unexpected no error Thrown on Main");
+#       } catch (NoSuchMethodError e) {
+#           System.out.println("Expected NSME Thrown on Main");
+#       } catch (Throwable e) {
+#           System.out.println("Unexpected Error Thrown on Main");
+#           e.printStackTrace(System.out);
+#       }
+#       System.out.println("Calling verifiable function on Main");
+#       System.out.println(m.sayHi());
+#       return;
+#   }
+#
+#   public static void callIface(Iface m) {
+#       System.out.println("Calling verifiable function on Iface");
+#       System.out.println(m.sayHi());
+#       System.out.println("Calling unverifiable function on Iface");
+#       try {
+#           m.verificationSoftFail();
+#           System.out.println("Unexpected no error Thrown on Iface");
+#       } catch (NoSuchMethodError e) {
+#           System.out.println("Expected NSME Thrown on Iface");
+#       } catch (Throwable e) {
+#           System.out.println("Unexpected Error Thrown on Iface");
+#           e.printStackTrace(System.out);
+#       }
+#       System.out.println("Calling verifiable function on Iface");
+#       System.out.println(m.sayHi());
+#       return;
+#   }
+# }
+
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 3
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    const-string v0, "Create Main instance"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    new-instance v2, LMain;
+    invoke-direct {v2}, LMain;-><init>()V
+
+    const-string v0, "Calling functions on concrete Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callMain(LMain;)V
+
+    const-string v0, "Calling functions on interface Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callIface(LIface;)V
+
+    return-void
+.end method
+
+.method public static callIface(LIface;)V
+    .locals 3
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling verifiable function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->sayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "Calling unverifiable function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-interface {p0}, LIface;->verificationSoftFail()V
+
+        const-string v0, "Unexpected no error Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/NoSuchMethodError; {:try_start .. :try_end} :NSME_error_start
+    .catch Ljava/lang/Throwable; {:try_start .. :try_end} :other_error_start
+    :NSME_error_start
+        const-string v0, "Expected NSME Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :other_error_start
+        move-exception v2
+        const-string v0, "Unexpected Error Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        invoke-virtual {v2,v1}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+        goto :error_end
+    :error_end
+    const-string v0, "Calling verifiable function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->sayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
+
+.method public static callMain(LMain;)V
+    .locals 3
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling verifiable function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->sayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "Calling unverifiable function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-virtual {p0}, LMain;->verificationSoftFail()V
+
+        const-string v0, "Unexpected no error Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/NoSuchMethodError; {:try_start .. :try_end} :NSME_error_start
+    .catch Ljava/lang/Throwable; {:try_start .. :try_end} :other_error_start
+    :NSME_error_start
+        const-string v0, "Expected NSME Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :other_error_start
+        move-exception v2
+        const-string v0, "Unexpected Error Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        invoke-virtual {v2,v1}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+        goto :error_end
+    :error_end
+    const-string v0, "Calling verifiable function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->sayHi()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
diff --git a/test/965-default-verify/smali/Statics.smali b/test/965-default-verify/smali/Statics.smali
new file mode 100644
index 0000000..1e8cac0
--- /dev/null
+++ b/test/965-default-verify/smali/Statics.smali
@@ -0,0 +1,30 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Statics {
+#   // public static void nonexistantFunction() {
+#   //     System.out.println("I don't exist");
+#   // }
+# }
+#
+.class public LStatics;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/966-default-conflict/build b/test/966-default-conflict/build
new file mode 100755
index 0000000..e66e840
--- /dev/null
+++ b/test/966-default-conflict/build
@@ -0,0 +1,34 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# TODO: Support running with jack.
+
+if [[ $@ == *"--jvm"* ]]; then
+  # Build the Java files if we are running a --jvm test
+  mkdir -p src
+  mkdir -p classes
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Build with the non-conflicting version
+  ${JAVAC} -implicit:none -d classes src/Iface.java build-src/Iface2.java src/Main.java
+  rm classes/Iface2.class
+  # Build with the conflicting version
+  ${JAVAC} -implicit:none -cp classes -d classes src/Iface2.java
+else
+  ./default-build "$@" --experimental default-methods
+fi
diff --git a/test/966-default-conflict/build-src/Iface2.java b/test/966-default-conflict/build-src/Iface2.java
new file mode 100644
index 0000000..8d97df8
--- /dev/null
+++ b/test/966-default-conflict/build-src/Iface2.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// We extend Iface so that javac will not complain that Iface2 does not declare a sayHi method or
+// has a soft-conflict on the sayHi method if it did.
+public interface Iface2 extends Iface {
+  // public default String sayHi() {
+  //   return "hello";
+  // }
+}
+
+
diff --git a/test/966-default-conflict/expected.txt b/test/966-default-conflict/expected.txt
new file mode 100644
index 0000000..fad2c25
--- /dev/null
+++ b/test/966-default-conflict/expected.txt
@@ -0,0 +1,18 @@
+Create Main instance
+Calling functions on concrete Main
+Calling non-conflicting function on Main
+CHARGE
+Calling conflicting function on Main
+Expected ICCE Thrown on Main
+Calling non-conflicting function on Main
+CHARGE
+Calling functions on interface Iface
+Calling non-conflicting function on Iface
+CHARGE
+Calling conflicting function on Iface
+Expected ICCE Thrown on Iface
+Calling non-conflicting function on Iface
+CHARGE
+Calling functions on interface Iface2
+Calling conflicting function on Iface2
+Expected ICCE Thrown on Iface2
diff --git a/test/966-default-conflict/info.txt b/test/966-default-conflict/info.txt
new file mode 100644
index 0000000..2b67657
--- /dev/null
+++ b/test/966-default-conflict/info.txt
@@ -0,0 +1,6 @@
+Smali-based tests for experimental interface static methods.
+
+Tests handling of default method conflicts.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/966-default-conflict/run b/test/966-default-conflict/run
new file mode 100755
index 0000000..8944ea9
--- /dev/null
+++ b/test/966-default-conflict/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/966-default-conflict/smali/Iface.smali b/test/966-default-conflict/smali/Iface.smali
new file mode 100644
index 0000000..e996b3a
--- /dev/null
+++ b/test/966-default-conflict/smali/Iface.smali
@@ -0,0 +1,39 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface {
+#   public default String sayHi() {
+#       return "Hi";
+#   }
+#   public default String charge() {
+#       return "CHARGE";
+#   }
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public sayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "Hi"
+    return-object v0
+.end method
+
+.method public charge()Ljava/lang/String;
+    .locals 1
+    const-string v0, "CHARGE"
+    return-object v0
+.end method
diff --git a/test/966-default-conflict/smali/Iface2.smali b/test/966-default-conflict/smali/Iface2.smali
new file mode 100644
index 0000000..82fa547
--- /dev/null
+++ b/test/966-default-conflict/smali/Iface2.smali
@@ -0,0 +1,31 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface2 {
+#   public default String sayHi() {
+#     return "hello";
+#   }
+# }
+
+.class public abstract interface LIface2;
+.super Ljava/lang/Object;
+
+.method public sayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "hello"
+    return-object v0
+.end method
+
diff --git a/test/966-default-conflict/smali/Main.smali b/test/966-default-conflict/smali/Main.smali
new file mode 100644
index 0000000..ce974d8
--- /dev/null
+++ b/test/966-default-conflict/smali/Main.smali
@@ -0,0 +1,227 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main implements Iface, Iface2 {
+#   public static void main(String[] args) {
+#       System.out.println("Create Main instance");
+#       Main m = new Main();
+#       System.out.println("Calling functions on concrete Main");
+#       callMain(m);
+#       System.out.println("Calling functions on interface Iface");
+#       callIface(m);
+#       System.out.println("Calling functions on interface Iface2");
+#       callIface2(m);
+#   }
+#
+#   public static void callMain(Main m) {
+#       System.out.println("Calling non-conflicting function on Main");
+#       System.out.println(m.charge());
+#       System.out.println("Calling conflicting function on Main");
+#       try {
+#           System.out.println(m.sayHi());
+#           System.out.println("Unexpected no error Thrown on Main");
+#       } catch (AbstractMethodError e) {
+#           System.out.println("Unexpected AME Thrown on Main");
+#       } catch (IncompatibleClassChangeError e) {
+#           System.out.println("Expected ICCE Thrown on Main");
+#       }
+#       System.out.println("Calling non-conflicting function on Main");
+#       System.out.println(m.charge());
+#       return;
+#   }
+#
+#   public static void callIface(Iface m) {
+#       System.out.println("Calling non-conflicting function on Iface");
+#       System.out.println(m.charge());
+#       System.out.println("Calling conflicting function on Iface");
+#       try {
+#           System.out.println(m.sayHi());
+#           System.out.println("Unexpected no error Thrown on Iface");
+#       } catch (AbstractMethodError e) {
+#           System.out.println("Unexpected AME Thrown on Iface");
+#       } catch (IncompatibleClassChangeError e) {
+#           System.out.println("Expected ICCE Thrown on Iface");
+#       }
+#       System.out.println("Calling non-conflicting function on Iface");
+#       System.out.println(m.charge());
+#       return;
+#   }
+#
+#   public static void callIface2(Iface2 m) {
+#       System.out.println("Calling conflicting function on Iface2");
+#       try {
+#           System.out.println(m.sayHi());
+#           System.out.println("Unexpected no error Thrown on Iface2");
+#       } catch (AbstractMethodError e) {
+#           System.out.println("Unexpected AME Thrown on Iface2");
+#       } catch (IncompatibleClassChangeError e) {
+#           System.out.println("Expected ICCE Thrown on Iface2");
+#       }
+#       return;
+#   }
+# }
+
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+.implements LIface2;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 3
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    const-string v0, "Create Main instance"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    new-instance v2, LMain;
+    invoke-direct {v2}, LMain;-><init>()V
+
+    const-string v0, "Calling functions on concrete Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callMain(LMain;)V
+
+    const-string v0, "Calling functions on interface Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callIface(LIface;)V
+
+    const-string v0, "Calling functions on interface Iface2"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callIface2(LIface2;)V
+
+    return-void
+.end method
+
+.method public static callIface(LIface;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling non-conflicting function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "Calling conflicting function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-interface {p0}, LIface;->sayHi()Ljava/lang/String;
+        move-result-object v0
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        const-string v0, "Unexpected no error Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/AbstractMethodError; {:try_start .. :try_end} :AME_error_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :ICCE_error_start
+    :AME_error_start
+        const-string v0, "Unexpected AME Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :ICCE_error_start
+        const-string v0, "Expected ICCE Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :error_end
+    const-string v0, "Calling non-conflicting function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
+
+.method public static callIface2(LIface2;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling conflicting function on Iface2"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-interface {p0}, LIface2;->sayHi()Ljava/lang/String;
+        move-result-object v0
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        const-string v0, "Unexpected no error Thrown on Iface2"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/AbstractMethodError; {:try_start .. :try_end} :AME_error_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :ICCE_error_start
+    :AME_error_start
+        const-string v0, "Unexpected AME Thrown on Iface2"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :ICCE_error_start
+        const-string v0, "Expected ICCE Thrown on Iface2"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :error_end
+
+    return-void
+.end method
+
+.method public static callMain(LMain;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling non-conflicting function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "Calling conflicting function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-virtual {p0}, LMain;->sayHi()Ljava/lang/String;
+        move-result-object v0
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        const-string v0, "Unexpected no error Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/AbstractMethodError; {:try_start .. :try_end} :AME_error_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :ICCE_error_start
+    :AME_error_start
+        const-string v0, "Unexpected AME Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :ICCE_error_start
+        const-string v0, "Expected ICCE Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :error_end
+    const-string v0, "Calling non-conflicting function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
diff --git a/test/967-default-ame/build b/test/967-default-ame/build
new file mode 100755
index 0000000..53001a9
--- /dev/null
+++ b/test/967-default-ame/build
@@ -0,0 +1,35 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# TODO: Support running with jack.
+
+if [[ $@ == *"--jvm"* ]]; then
+  # Build the Java files if we are running a --jvm test
+  mkdir -p src
+  mkdir -p classes
+  ${ANDROID_BUILD_TOP}/art/tools/extract-embedded-java ./smali ./src
+  # Build with the non-conflicting version
+  ${JAVAC} -implicit:none -d classes src/Iface.java build-src/Iface2.java build-src/Iface3.java src/Main.java
+  rm classes/Iface2.class
+  rm classes/Iface3.class
+  # Build with the conflicting version
+  ${JAVAC} -implicit:none -cp classes -d classes src/Iface2.java src/Iface3.java
+else
+  ./default-build "$@" --experimental default-methods
+fi
diff --git a/test/967-default-ame/build-src/Iface2.java b/test/967-default-ame/build-src/Iface2.java
new file mode 100644
index 0000000..55b2ac0
--- /dev/null
+++ b/test/967-default-ame/build-src/Iface2.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface2 extends Iface {
+  // public String sayHi();
+}
+
+
diff --git a/test/967-default-ame/build-src/Iface3.java b/test/967-default-ame/build-src/Iface3.java
new file mode 100644
index 0000000..a6faa45
--- /dev/null
+++ b/test/967-default-ame/build-src/Iface3.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface Iface3 {
+  // public String charge();
+}
diff --git a/test/967-default-ame/expected.txt b/test/967-default-ame/expected.txt
new file mode 100644
index 0000000..cbd4ad3
--- /dev/null
+++ b/test/967-default-ame/expected.txt
@@ -0,0 +1,18 @@
+Create Main instance
+Calling functions on concrete Main
+Calling non-abstract function on Main
+CHARGE
+Calling abstract function on Main
+Expected AME Thrown on Main
+Calling non-abstract function on Main
+CHARGE
+Calling functions on interface Iface
+Calling non-abstract function on Iface
+CHARGE
+Calling abstract function on Iface
+Expected AME Thrown on Iface
+Calling non-abstract function on Iface
+CHARGE
+Calling functions on interface Iface2
+Calling abstract function on Iface2
+Expected AME Thrown on Iface2
diff --git a/test/967-default-ame/info.txt b/test/967-default-ame/info.txt
new file mode 100644
index 0000000..a346a32
--- /dev/null
+++ b/test/967-default-ame/info.txt
@@ -0,0 +1,6 @@
+Smali-based tests for experimental interface static methods.
+
+Tests handling of default method overrides.
+
+To run with --jvm you must export JAVA_HOME to a Java 8 Language installation
+and pass the --use-java-home to run-test
diff --git a/test/967-default-ame/run b/test/967-default-ame/run
new file mode 100755
index 0000000..8944ea9
--- /dev/null
+++ b/test/967-default-ame/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/967-default-ame/smali/Iface.smali b/test/967-default-ame/smali/Iface.smali
new file mode 100644
index 0000000..e996b3a
--- /dev/null
+++ b/test/967-default-ame/smali/Iface.smali
@@ -0,0 +1,39 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface {
+#   public default String sayHi() {
+#       return "Hi";
+#   }
+#   public default String charge() {
+#       return "CHARGE";
+#   }
+# }
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
+
+.method public sayHi()Ljava/lang/String;
+    .locals 1
+    const-string v0, "Hi"
+    return-object v0
+.end method
+
+.method public charge()Ljava/lang/String;
+    .locals 1
+    const-string v0, "CHARGE"
+    return-object v0
+.end method
diff --git a/test/967-default-ame/smali/Iface2.smali b/test/967-default-ame/smali/Iface2.smali
new file mode 100644
index 0000000..a21a8dd
--- /dev/null
+++ b/test/967-default-ame/smali/Iface2.smali
@@ -0,0 +1,27 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface2 extends Iface {
+#   public String sayHi();
+# }
+
+.class public abstract interface LIface2;
+.super Ljava/lang/Object;
+.implements LIface;
+
+.method public abstract sayHi()Ljava/lang/String;
+.end method
+
diff --git a/test/967-default-ame/smali/Iface3.smali b/test/967-default-ame/smali/Iface3.smali
new file mode 100644
index 0000000..874e96d
--- /dev/null
+++ b/test/967-default-ame/smali/Iface3.smali
@@ -0,0 +1,26 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public interface Iface3 {
+#   public String charge();
+# }
+
+.class public abstract interface LIface3;
+.super Ljava/lang/Object;
+
+.method public abstract charge()Ljava/lang/String;
+.end method
+
diff --git a/test/967-default-ame/smali/Main.smali b/test/967-default-ame/smali/Main.smali
new file mode 100644
index 0000000..e4d63cf
--- /dev/null
+++ b/test/967-default-ame/smali/Main.smali
@@ -0,0 +1,228 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# class Main implements Iface, Iface2, Iface3 {
+#   public static void main(String[] args) {
+#       System.out.println("Create Main instance");
+#       Main m = new Main();
+#       System.out.println("Calling functions on concrete Main");
+#       callMain(m);
+#       System.out.println("Calling functions on interface Iface");
+#       callIface(m);
+#       System.out.println("Calling functions on interface Iface2");
+#       callIface2(m);
+#   }
+#
+#   public static void callMain(Main m) {
+#       System.out.println("Calling non-abstract function on Main");
+#       System.out.println(m.charge());
+#       System.out.println("Calling abstract function on Main");
+#       try {
+#           System.out.println(m.sayHi());
+#           System.out.println("Unexpected no error Thrown on Main");
+#       } catch (AbstractMethodError e) {
+#           System.out.println("Expected AME Thrown on Main");
+#       } catch (IncompatibleClassChangeError e) {
+#           System.out.println("Unexpected ICCE Thrown on Main");
+#       }
+#       System.out.println("Calling non-abstract function on Main");
+#       System.out.println(m.charge());
+#       return;
+#   }
+#
+#   public static void callIface(Iface m) {
+#       System.out.println("Calling non-abstract function on Iface");
+#       System.out.println(m.charge());
+#       System.out.println("Calling abstract function on Iface");
+#       try {
+#           System.out.println(m.sayHi());
+#           System.out.println("Unexpected no error Thrown on Iface");
+#       } catch (AbstractMethodError e) {
+#           System.out.println("Expected AME Thrown on Iface");
+#       } catch (IncompatibleClassChangeError e) {
+#           System.out.println("Unexpected ICCE Thrown on Iface");
+#       }
+#       System.out.println("Calling non-abstract function on Iface");
+#       System.out.println(m.charge());
+#       return;
+#   }
+#
+#   public static void callIface2(Iface2 m) {
+#       System.out.println("Calling abstract function on Iface2");
+#       try {
+#           System.out.println(m.sayHi());
+#           System.out.println("Unexpected no error Thrown on Iface2");
+#       } catch (AbstractMethodError e) {
+#           System.out.println("Expected AME Thrown on Iface2");
+#       } catch (IncompatibleClassChangeError e) {
+#           System.out.println("Unexpected ICCE Thrown on Iface2");
+#       }
+#       return;
+#   }
+# }
+
+.class public LMain;
+.super Ljava/lang/Object;
+.implements LIface;
+.implements LIface2;
+.implements LIface3;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public static main([Ljava/lang/String;)V
+    .locals 3
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    const-string v0, "Create Main instance"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    new-instance v2, LMain;
+    invoke-direct {v2}, LMain;-><init>()V
+
+    const-string v0, "Calling functions on concrete Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callMain(LMain;)V
+
+    const-string v0, "Calling functions on interface Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callIface(LIface;)V
+
+    const-string v0, "Calling functions on interface Iface2"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    invoke-static {v2}, LMain;->callIface2(LIface2;)V
+
+    return-void
+.end method
+
+.method public static callIface(LIface;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling non-abstract function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "Calling abstract function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-interface {p0}, LIface;->sayHi()Ljava/lang/String;
+        move-result-object v0
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        const-string v0, "Unexpected no error Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/AbstractMethodError; {:try_start .. :try_end} :AME_error_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :ICCE_error_start
+    :AME_error_start
+        const-string v0, "Expected AME Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :ICCE_error_start
+        const-string v0, "Unexpected ICCE Thrown on Iface"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :error_end
+    const-string v0, "Calling non-abstract function on Iface"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-interface {p0}, LIface;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
+
+.method public static callIface2(LIface2;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling abstract function on Iface2"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-interface {p0}, LIface2;->sayHi()Ljava/lang/String;
+        move-result-object v0
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        const-string v0, "Unexpected no error Thrown on Iface2"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/AbstractMethodError; {:try_start .. :try_end} :AME_error_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :ICCE_error_start
+    :AME_error_start
+        const-string v0, "Expected AME Thrown on Iface2"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :ICCE_error_start
+        const-string v0, "Unexpected ICCE Thrown on Iface2"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :error_end
+
+    return-void
+.end method
+
+.method public static callMain(LMain;)V
+    .locals 2
+    sget-object v1, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    const-string v0, "Calling non-abstract function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    const-string v0, "Calling abstract function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+    :try_start
+        invoke-virtual {p0}, LMain;->sayHi()Ljava/lang/String;
+        move-result-object v0
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        const-string v0, "Unexpected no error Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+        goto :error_end
+    :try_end
+    .catch Ljava/lang/AbstractMethodError; {:try_start .. :try_end} :AME_error_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :ICCE_error_start
+    :AME_error_start
+        const-string v0, "Expected AME Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :ICCE_error_start
+        const-string v0, "Unexpected ICCE Thrown on Main"
+        invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+        goto :error_end
+    :error_end
+    const-string v0, "Calling non-abstract function on Main"
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    invoke-virtual {p0}, LMain;->charge()Ljava/lang/String;
+    move-result-object v0
+    invoke-virtual {v1,v0}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+
+    return-void
+.end method
diff --git a/test/968-default-partial-compile-generated/build b/test/968-default-partial-compile-generated/build
new file mode 100755
index 0000000..1e9f8aa
--- /dev/null
+++ b/test/968-default-partial-compile-generated/build
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+# We will be making more files than the ulimit is set to allow. Remove it temporarily.
+OLD_ULIMIT=`ulimit -S`
+ulimit -S unlimited
+
+restore_ulimit() {
+  ulimit -S "$OLD_ULIMIT"
+}
+trap 'restore_ulimit' ERR
+
+# TODO: Support running with jack.
+
+if [[ $@ == *"--jvm"* ]]; then
+  # Build the Java files if we are running a --jvm test
+  mkdir -p classes
+  mkdir -p src
+  echo "${JAVAC} \$@" >> ./javac_exec.sh
+  # This will use java_exec.sh to execute the javac compiler. It will place the
+  # compiled class files in ./classes and the expected values in expected.txt
+  #
+  # After this the src directory will contain the final versions of all files.
+  ./util-src/generate_java.py ./javac_exec.sh ./src ./classes ./expected.txt ./build_log
+else
+  mkdir -p ./smali
+  # Generate the smali files and expected.txt or fail
+  ./util-src/generate_smali.py ./smali ./expected.txt
+  # Use the default build script
+  ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
+fi
+
+# Reset the ulimit back to its initial value
+restore_ulimit
diff --git a/test/968-default-partial-compile-generated/expected.txt b/test/968-default-partial-compile-generated/expected.txt
new file mode 100644
index 0000000..1ddd65d
--- /dev/null
+++ b/test/968-default-partial-compile-generated/expected.txt
@@ -0,0 +1 @@
+This file is generated by util-src/generate_smali.py do not directly modify!
diff --git a/test/968-default-partial-compile-generated/info.txt b/test/968-default-partial-compile-generated/info.txt
new file mode 100644
index 0000000..bc1c428
--- /dev/null
+++ b/test/968-default-partial-compile-generated/info.txt
@@ -0,0 +1,17 @@
+Smali-based tests for experimental interface default methods.
+
+This tests that interface method resolution order is correct in the presence of
+partial compilation/illegal invokes.
+
+Obviously needs to run under ART or a Java 8 Language runtime and compiler.
+
+When run smali test files are generated by the util-src/generate_smali.py
+script.  If we run with --jvm we will use the util-src/generate_java.py script
+will generate equivalent java code based on the smali code.
+
+Care should be taken when updating the generate_smali.py script. It should always
+return equivalent output when run multiple times and the expected output should
+be valid.
+
+Do not modify the expected.txt file. It is generated on each run by
+util-src/generate_smali.py.
diff --git a/test/968-default-partial-compile-generated/run b/test/968-default-partial-compile-generated/run
new file mode 100755
index 0000000..6d2930d
--- /dev/null
+++ b/test/968-default-partial-compile-generated/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+${RUN} "$@" --experimental default-methods
diff --git a/test/968-default-partial-compile-generated/util-src/generate_java.py b/test/968-default-partial-compile-generated/util-src/generate_java.py
new file mode 100755
index 0000000..35290ef
--- /dev/null
+++ b/test/968-default-partial-compile-generated/util-src/generate_java.py
@@ -0,0 +1,134 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate java test files for test 966.
+"""
+
+import generate_smali as base
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+import testgen.mixins as mixins
+import functools
+import operator
+import subprocess
+
+class JavaConverter(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A class that can convert a SmaliFile to a JavaFile.
+  """
+  def __init__(self, inner):
+    self.inner = inner
+
+  def get_name(self):
+    """Gets the name of this file."""
+    return self.inner.get_name()
+
+  def __str__(self):
+    out = ""
+    for line in str(self.inner).splitlines(keepends = True):
+      if line.startswith("#"):
+        out += line[1:]
+    return out
+
+class Compiler:
+  def __init__(self, sources, javac, temp_dir, classes_dir):
+    self.javac = javac
+    self.temp_dir = temp_dir
+    self.classes_dir = classes_dir
+    self.sources = sources
+
+  def compile_files(self, args, files):
+    """
+    Compile the files given with the arguments given.
+    """
+    args = args.split()
+    files = list(map(str, files))
+    cmd = ['sh', '-a', '-e', '--', str(self.javac)] + args + files
+    print("Running compile command: {}".format(cmd))
+    subprocess.check_call(cmd)
+    print("Compiled {} files".format(len(files)))
+
+  def execute(self):
+    """
+    Compiles this test, doing partial compilation as necessary.
+    """
+    # Compile Main and all classes first. Force all interfaces to be default so that there will be
+    # no compiler problems (works since classes only implement 1 interface).
+    for f in self.sources:
+      if isinstance(f, base.TestInterface):
+        JavaConverter(f.get_specific_version(base.InterfaceType.default)).dump(self.temp_dir)
+      else:
+        JavaConverter(f).dump(self.temp_dir)
+    self.compile_files("-d {}".format(self.classes_dir), self.temp_dir.glob("*.java"))
+
+    # Now we compile the interfaces
+    ifaces = set(i for i in self.sources if isinstance(i, base.TestInterface))
+    while len(ifaces) != 0:
+      # Find those ifaces where there are no (uncompiled) interfaces that are subtypes.
+      tops = set(filter(lambda a: not any(map(lambda i: a in i.get_super_types(), ifaces)), ifaces))
+      files = []
+      # Dump these ones, they are getting compiled.
+      for f in tops:
+        out = JavaConverter(f)
+        out.dump(self.temp_dir)
+        files.append(self.temp_dir / out.get_file_name())
+      # Force all superinterfaces of these to be empty so there will be no conflicts
+      overrides = functools.reduce(operator.or_, map(lambda i: i.get_super_types(), tops), set())
+      for overridden in overrides:
+        out = JavaConverter(overridden.get_specific_version(base.InterfaceType.empty))
+        out.dump(self.temp_dir)
+        files.append(self.temp_dir / out.get_file_name())
+      self.compile_files("-d {outdir} -cp {outdir}".format(outdir = self.classes_dir), files)
+      # Remove these from the set of interfaces to be compiled.
+      ifaces -= tops
+    print("Finished compiling all files.")
+    return
+
+def main(argv):
+  javac_exec = Path(argv[1])
+  if not javac_exec.exists() or not javac_exec.is_file():
+    print("{} is not a shell script".format(javac_exec), file=sys.stderr)
+    sys.exit(1)
+  temp_dir = Path(argv[2])
+  if not temp_dir.exists() or not temp_dir.is_dir():
+    print("{} is not a valid source dir".format(temp_dir), file=sys.stderr)
+    sys.exit(1)
+  classes_dir = Path(argv[3])
+  if not classes_dir.exists() or not classes_dir.is_dir():
+    print("{} is not a valid classes directory".format(classes_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[4])
+  mainclass, all_files = base.create_all_test_files()
+
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  print("Wrote expected output")
+
+  Compiler(all_files, javac_exec, temp_dir, classes_dir).execute()
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/968-default-partial-compile-generated/util-src/generate_smali.py b/test/968-default-partial-compile-generated/util-src/generate_smali.py
new file mode 100755
index 0000000..9855bcf
--- /dev/null
+++ b/test/968-default-partial-compile-generated/util-src/generate_smali.py
@@ -0,0 +1,607 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Smali test files for test 967.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+from enum import Enum
+from functools import total_ordering
+import itertools
+import string
+
+# The max depth the type tree can have.
+MAX_IFACE_DEPTH = 3
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.SmaliFileMixin):
+  """
+  A Main.smali file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+
+.class public LMain;
+.super Ljava/lang/Object;
+
+# class Main {{
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+{test_funcs}
+
+{main_func}
+
+# }}
+"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+#   public static void main(String[] args) {{
+.method public static main([Ljava/lang/String;)V
+    .locals 0
+
+    {test_group_invoke}
+
+    return-void
+.end method
+#   }}
+"""
+
+  TEST_GROUP_INVOKE_TEMPLATE = """
+#     {test_name}();
+    invoke-static {{}}, {test_name}()V
+"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def get_expected(self):
+    """
+    Get the expected output of this test.
+    """
+    all_tests = sorted(self.tests)
+    return filter_blanks("\n".join(a.get_expected() for a in all_tests))
+
+  def add_test(self, ty):
+    """
+    Add a test for the concrete type 'ty'
+    """
+    self.tests.add(Func(ty))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass smali code.
+    """
+    all_tests = sorted(self.tests)
+    test_invoke = ""
+    test_funcs = ""
+    for t in all_tests:
+      test_funcs += str(t)
+    for t in all_tests:
+      test_invoke += self.TEST_GROUP_INVOKE_TEMPLATE.format(test_name=t.get_name())
+    main_func = self.MAIN_FUNCTION_TEMPLATE.format(test_group_invoke=test_invoke)
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("smali"),
+                                           test_funcs = test_funcs,
+                                           main_func = main_func)
+
+class Func(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that tests the functionality of a concrete type. Should only be
+  constructed by MainClass.add_test.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+#   public static void {fname}() {{
+#     {farg} v = null;
+#     try {{
+#       v = new {farg}();
+#     }} catch (Throwable e) {{
+#       System.out.println("Unexpected error occurred which creating {farg} instance");
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#     try {{
+#       System.out.printf("{tree} calls %s\\n", v.getName());
+#       return;
+#     }} catch (AbstractMethodError e) {{
+#       System.out.println("{tree} threw AbstractMethodError");
+#     }} catch (NoSuchMethodError e) {{
+#       System.out.println("{tree} threw NoSuchMethodError");
+#     }} catch (IncompatibleClassChangeError e) {{
+#       System.out.println("{tree} threw IncompatibleClassChangeError");
+#     }} catch (Throwable e) {{
+#       e.printStackTrace(System.out);
+#       return;
+#     }}
+#   }}
+.method public static {fname}()V
+    .locals 7
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+
+    :new_{fname}_try_start
+      new-instance v0, L{farg};
+      invoke-direct {{v0}}, L{farg};-><init>()V
+      goto :call_{fname}_try_start
+    :new_{fname}_try_end
+    .catch Ljava/lang/Throwable; {{:new_{fname}_try_start .. :new_{fname}_try_end}} :new_error_{fname}_start
+    :new_error_{fname}_start
+      move-exception v6
+      const-string v5, "Unexpected error occurred which creating {farg} instance"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+    :call_{fname}_try_start
+      const/4 v1, 1
+      new-array v2,v1, [Ljava/lang/Object;
+      const/4 v1, 0
+      invoke-virtual {{v0}}, L{farg};->getName()Ljava/lang/String;
+      move-result-object v3
+      aput-object v3,v2,v1
+
+      const-string v5, "{tree} calls %s\\n"
+
+      invoke-virtual {{v4,v5,v2}}, Ljava/io/PrintStream;->printf(Ljava/lang/String;[Ljava/lang/Object;)Ljava/io/PrintStream;
+      return-void
+    :call_{fname}_try_end
+    .catch Ljava/lang/AbstractMethodError; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :AME_{fname}_start
+    .catch Ljava/lang/NoSuchMethodError; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :NSME_{fname}_start
+    .catch Ljava/lang/IncompatibleClassChangeError; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :ICCE_{fname}_start
+    .catch Ljava/lang/Throwable; {{:call_{fname}_try_start .. :call_{fname}_try_end}} :error_{fname}_start
+    :AME_{fname}_start
+      const-string v5, "{tree} threw AbstractMethodError"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      return-void
+    :NSME_{fname}_start
+      const-string v5, "{tree} threw NoSuchMethodError"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      return-void
+    :ICCE_{fname}_start
+      const-string v5, "{tree} threw IncompatibleClassChangeError"
+      invoke-virtual {{v4,v5}}, Ljava/io/PrintStream;->println(Ljava/lang/Object;)V
+      return-void
+    :error_{fname}_start
+      move-exception v6
+      invoke-virtual {{v6,v4}}, Ljava/lang/Throwable;->printStackTrace(Ljava/io/PrintStream;)V
+      return-void
+.end method
+"""
+
+  NSME_RESULT_TEMPLATE = "{tree} threw NoSuchMethodError"
+  ICCE_RESULT_TEMPLATE = "{tree} threw IncompatibleClassChangeError"
+  AME_RESULT_TEMPLATE = "{tree} threw AbstractMethodError"
+  NORMAL_RESULT_TEMPLATE = "{tree} calls {result}"
+
+  def __init__(self, farg):
+    """
+    Initialize a test function for the given argument
+    """
+    self.farg = farg
+
+  def get_expected(self):
+    """
+    Get the expected output calling this function.
+    """
+    exp = self.farg.get_called()
+    if exp.is_empty():
+      return self.NSME_RESULT_TEMPLATE.format(tree = self.farg.get_tree())
+    elif exp.is_abstract():
+      return self.AME_RESULT_TEMPLATE.format(tree = self.farg.get_tree())
+    elif exp.is_conflict():
+      return self.ICCE_RESULT_TEMPLATE.format(tree = self.farg.get_tree())
+    else:
+      assert exp.is_default()
+      return self.NORMAL_RESULT_TEMPLATE.format(tree = self.farg.get_tree(),
+                                                result = exp.get_tree())
+
+  def get_name(self):
+    """
+    Get the name of this function
+    """
+    return "TEST_FUNC_{}".format(self.farg.get_name())
+
+  def __str__(self):
+    """
+    Print the smali code of this function.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(tree = self.farg.get_tree(),
+                                              fname = self.get_name(),
+                                              farg = self.farg.get_name())
+
+class TestClass(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  A class that will be instantiated to test default method resolution order.
+  """
+
+  TEST_CLASS_TEMPLATE = """{copyright}
+
+.class public L{class_name};
+.super Ljava/lang/Object;
+.implements L{iface_name};
+
+# public class {class_name} implements {iface_name} {{
+
+.method public constructor <init>()V
+  .registers 1
+  invoke-direct {{p0}}, Ljava/lang/Object;-><init>()V
+  return-void
+.end method
+
+{funcs}
+
+# }}
+"""
+
+  def __init__(self, iface):
+    """
+    Initialize this test class which implements the given interface
+    """
+    self.iface = iface
+    self.class_name = "CLASS_"+gensym()
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iface_tree}]".format(class_name = self.class_name,
+                                                iface_tree = self.iface.get_tree())
+
+  def __iter__(self):
+    """
+    Step through all interfaces implemented transitively by this class
+    """
+    yield self.iface
+    yield from self.iface
+
+  def get_called(self):
+    """
+    Returns the interface that will be called when the method on this class is invoked or
+    CONFLICT_TYPE if there is no interface that will be called.
+    """
+    return self.iface.get_called()
+
+  def __str__(self):
+    """
+    Print the smali code of this class.
+    """
+    return self.TEST_CLASS_TEMPLATE.format(copyright = get_copyright('smali'),
+                                           iface_name = self.iface.get_name(),
+                                           tree = self.get_tree(),
+                                           class_name = self.class_name,
+                                           funcs = "")
+
+class InterfaceType(Enum):
+  """
+  An enumeration of all the different types of interfaces we can have.
+
+  default: It has a default method
+  abstract: It has a method declared but not defined
+  empty: It does not have the method
+  """
+  default = 0
+  abstract = 1
+  empty = 2
+
+  def get_suffix(self):
+    if self == InterfaceType.default:
+      return "_DEFAULT"
+    elif self == InterfaceType.abstract:
+      return "_ABSTRACT"
+    elif self == InterfaceType.empty:
+      return "_EMPTY"
+    else:
+      raise TypeError("Interface type had illegal value.")
+
+class ConflictInterface:
+  """
+  A singleton representing a conflict of default methods.
+  """
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return True
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return False
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return False
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return False
+
+CONFLICT_TYPE = ConflictInterface()
+
+class TestInterface(mixins.DumpMixin, mixins.Named, mixins.NameComparableMixin, mixins.SmaliFileMixin):
+  """
+  An interface that will be used to test default method resolution order.
+  """
+
+  TEST_INTERFACE_TEMPLATE = """{copyright}
+.class public abstract interface L{class_name};
+.super Ljava/lang/Object;
+{implements_spec}
+
+# public interface {class_name} {extends} {ifaces} {{
+
+{funcs}
+
+# }}
+"""
+
+  DEFAULT_FUNC_TEMPLATE = """
+#   public default String getName() {{
+#     return "{tree}";
+#   }}
+.method public getName()Ljava/lang/String;
+  .locals 1
+  const-string v0, "{tree}"
+  return-object v0
+.end method
+"""
+
+  ABSTRACT_FUNC_TEMPLATE = """
+#   public String getName();
+.method public abstract getName()Ljava/lang/String;
+.end method
+"""
+
+  EMPTY_FUNC_TEMPLATE = """"""
+
+  IMPLEMENTS_TEMPLATE = """
+.implements L{iface_name};
+"""
+
+  def __init__(self, ifaces, iface_type, full_name = None):
+    """
+    Initialize interface with the given super-interfaces
+    """
+    self.ifaces = sorted(ifaces)
+    self.iface_type = iface_type
+    if full_name is None:
+      end = self.iface_type.get_suffix()
+      self.class_name = "INTERFACE_"+gensym()+end
+    else:
+      self.class_name = full_name
+
+  def get_specific_version(self, v):
+    """
+    Returns a copy of this interface of the given type for use in partial compilation.
+    """
+    return TestInterface(self.ifaces, v, full_name = self.class_name)
+
+  def get_super_types(self):
+    """
+    Returns a set of all the supertypes of this interface
+    """
+    return set(i2 for i2 in self)
+
+  def is_conflict(self):
+    """
+    Returns true if this is a conflict interface and calling the method on this interface will
+    result in an IncompatibleClassChangeError.
+    """
+    return False
+
+  def is_abstract(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in an AbstractMethodError.
+    """
+    return self.iface_type == InterfaceType.abstract
+
+  def is_empty(self):
+    """
+    Returns true if this is an abstract interface and calling the method on this interface will
+    result in a NoSuchMethodError.
+    """
+    return self.iface_type == InterfaceType.empty
+
+  def is_default(self):
+    """
+    Returns true if this is a default interface and calling the method on this interface will
+    result in a method actually being called.
+    """
+    return self.iface_type == InterfaceType.default
+
+  def get_called(self):
+    """
+    Returns the interface that will be called when the method on this class is invoked or
+    CONFLICT_TYPE if there is no interface that will be called.
+    """
+    if not self.is_empty() or len(self.ifaces) == 0:
+      return self
+    else:
+      best = self
+      for super_iface in self.ifaces:
+        super_best = super_iface.get_called()
+        if super_best.is_conflict():
+          return CONFLICT_TYPE
+        elif best.is_default():
+          if super_best.is_default():
+            return CONFLICT_TYPE
+        elif best.is_abstract():
+          if super_best.is_default():
+            best = super_best
+        else:
+          assert best.is_empty()
+          best = super_best
+      return best
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return self.class_name
+
+  def get_tree(self):
+    """
+    Print out a representation of the type tree of this class
+    """
+    return "[{class_name} {iftree}]".format(class_name = self.get_name(),
+                                            iftree = print_tree(self.ifaces))
+
+  def __iter__(self):
+    """
+    Performs depth-first traversal of the interface tree this interface is the
+    root of. Does not filter out repeats.
+    """
+    for i in self.ifaces:
+      yield i
+      yield from i
+
+  def __str__(self):
+    """
+    Print the smali code of this interface.
+    """
+    s_ifaces = " "
+    j_ifaces = " "
+    for i in self.ifaces:
+      s_ifaces += self.IMPLEMENTS_TEMPLATE.format(iface_name = i.get_name())
+      j_ifaces += " {},".format(i.get_name())
+    j_ifaces = j_ifaces[0:-1]
+    if self.is_default():
+      funcs = self.DEFAULT_FUNC_TEMPLATE.format(tree = self.get_tree())
+    elif self.is_abstract():
+      funcs = self.ABSTRACT_FUNC_TEMPLATE.format()
+    else:
+      funcs = ""
+    return self.TEST_INTERFACE_TEMPLATE.format(copyright = get_copyright('smali'),
+                                               implements_spec = s_ifaces,
+                                               extends = "extends" if len(self.ifaces) else "",
+                                               ifaces = j_ifaces,
+                                               funcs = funcs,
+                                               tree = self.get_tree(),
+                                               class_name = self.class_name)
+
+def print_tree(ifaces):
+  """
+  Prints a list of iface trees
+  """
+  return " ".join(i.get_tree() for i in ifaces)
+
+# The deduplicated output of subtree_sizes for each size up to
+# MAX_LEAF_IFACE_PER_OBJECT.
+SUBTREES = [set(tuple(sorted(l)) for l in subtree_sizes(i))
+            for i in range(MAX_IFACE_DEPTH + 1)]
+
+def create_test_classes():
+  """
+  Yield all the test classes with the different interface trees
+  """
+  for num in range(1, MAX_IFACE_DEPTH + 1):
+    for iface in create_interface_trees(num):
+      yield TestClass(iface)
+
+def create_interface_trees(num):
+  """
+  Yield all the interface trees up to 'num' depth.
+  """
+  if num == 0:
+    for iftype in InterfaceType:
+      yield TestInterface(tuple(), iftype)
+    return
+  for split in SUBTREES[num]:
+    ifaces = []
+    for sub in split:
+      ifaces.append(list(create_interface_trees(sub)))
+    yield TestInterface(tuple(), InterfaceType.default)
+    for supers in itertools.product(*ifaces):
+      for iftype in InterfaceType:
+        if iftype == InterfaceType.default:
+          # We can just stop at defaults. We have other tests that a default can override an
+          # abstract and this cuts down on the number of cases significantly, improving speed of
+          # this test.
+          continue
+        yield TestInterface(supers, iftype)
+
+def create_all_test_files():
+  """
+  Creates all the objects representing the files in this test. They just need to
+  be dumped.
+  """
+  mc = MainClass()
+  classes = {mc}
+  for clazz in create_test_classes():
+    classes.add(clazz)
+    for i in clazz:
+      classes.add(i)
+    mc.add_test(clazz)
+  return mc, classes
+
+def main(argv):
+  smali_dir = Path(argv[1])
+  if not smali_dir.exists() or not smali_dir.is_dir():
+    print("{} is not a valid smali dir".format(smali_dir), file=sys.stderr)
+    sys.exit(1)
+  expected_txt = Path(argv[2])
+  mainclass, all_files = create_all_test_files()
+  with expected_txt.open('w') as out:
+    print(mainclass.get_expected(), file=out)
+  for f in all_files:
+    f.dump(smali_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index bffd0e0..f74a516 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -34,11 +34,11 @@
   139-register-natives/regnative.cc \
   141-class-unload/jni_unload.cc \
   454-get-vreg/get_vreg_jni.cc \
-  455-set-vreg/set_vreg_jni.cc \
   457-regs/regs_jni.cc \
   461-get-reference-vreg/get_reference_vreg_jni.cc \
   466-get-live-vreg/get_live_vreg_jni.cc \
-  497-inlining-and-class-loader/clear_dex_cache.cc
+  497-inlining-and-class-loader/clear_dex_cache.cc \
+  543-env-long-ref/env_long_ref.cc
 
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
 ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 9ff620b..0925d36 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -208,6 +208,10 @@
 $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_')
 endef  # name-to-var
 
+ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
+        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES))
+
 # Tests that are timing sensitive and flaky on heavily loaded systems.
 TEST_ART_TIMING_SENSITIVE_RUN_TESTS := \
   053-wait-some \
@@ -226,6 +230,7 @@
   960-default-smali \
   961-default-iface-resolution-generated \
   964-default-iface-init-generated \
+  968-default-partial-compile-generated
 
 # Check if we have python3 to run our tests.
 ifeq ($(wildcard /usr/bin/python3),)
@@ -253,8 +258,10 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat
+  117-nopatchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
@@ -304,12 +311,13 @@
 # Tests that are broken with GC stress.
 # * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
 #   hope the second process got into the expected state. The slowness of gcstress makes this bad.
-# * 961-default-iface-resolution-generated is a very long test that often will take more than the
-#   timeout to run when gcstress is enabled. This is because gcstress slows down allocations
-#   significantly which this test does a lot.
+# * 961-default-iface-resolution-generated and 964-default-iface-init-generated are very long tests
+#   that often will take more than the timeout to run when gcstress is enabled. This is because
+#   gcstress slows down allocations significantly which these tests do a lot.
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
   137-cfi \
-  961-default-iface-resolution-generated
+  961-default-iface-resolution-generated \
+  964-default-iface-init-generated
 
 ifneq (,$(filter gcstress,$(GC_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -445,7 +453,6 @@
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
     441-checker-inliner \
     510-checker-try-catch \
-    521-checker-array-set-null \
     536-checker-intrinsic-optimization \
 
 ifeq (mips,$(TARGET_ARCH))
@@ -459,18 +466,6 @@
 
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
 
-# Known broken tests for the optimizing compiler.
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS := \
-  455-set-vreg \
-
-ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
-
 # Tests that should fail when the optimizing compiler compiles them non-debuggable.
 TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
   454-get-vreg \
@@ -496,16 +491,20 @@
 TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
 
 # Tests that should fail in the read barrier configuration.
+# 055: Exceeds run time limits due to read barrier instrumentation.
 # 137: Read barrier forces interpreter. Cannot run this with the interpreter.
-# 141: Class unloading test is flaky with CC since CC seems to occasionally keep class loaders live.
+# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
+#      handled in the read barrier configuration.
 TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS := \
-  137-cfi \
-  141-class-unload
+  055-enum-performance                    \
+  137-cfi                                 \
+  537-checker-arraycopy
 
 ifeq ($(ART_USE_READ_BARRIER),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+      $(PREBUILD_TYPES),$(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
+      $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+      $(TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
 endif
 
 TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
diff --git a/test/dexdump/run-all-tests b/test/dexdump/run-all-tests
index 9cf7ab6..11ab55a 100755
--- a/test/dexdump/run-all-tests
+++ b/test/dexdump/run-all-tests
@@ -40,7 +40,7 @@
 # Set up dexdump binary and flags to test.
 DEXD="${ANDROID_HOST_OUT}/bin/dexdump2"
 DEXDFLAGS1="-dfh"
-DEXDFLAGS2="-l xml"
+DEXDFLAGS2="-e -l xml"
 
 # Set up dexlist binary and flags to test.
 DEXL="${ANDROID_HOST_OUT}/bin/dexlist"
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 18867fd..3efa6ff 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -367,6 +367,7 @@
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
                       --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
+                      --app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g") \
                       --instruction-set=$ISA"
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
     dex2oat_cmdline="${dex2oat_cmdline} --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
diff --git a/test/run-test b/test/run-test
index 9b0261e..6e13b8a 100755
--- a/test/run-test
+++ b/test/run-test
@@ -665,6 +665,15 @@
 
 export TEST_NAME=`basename ${test_dir}`
 
+# arch_supports_read_barrier ARCH
+# -------------------------------
+# Return whether the Optimizing compiler has read barrier support for ARCH.
+function arch_supports_read_barrier() {
+  # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the
+  # moment.
+  [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
+}
+
 # Tests named '<number>-checker-*' will also have their CFGs verified with
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
@@ -678,9 +687,24 @@
   USE_JACK="false"
 
   if [ "$runtime" = "art" -a "$image_suffix" = "-optimizing" ]; then
+    # Optimizing has read barrier support for certain architectures
+    # only. On other architectures, compiling is disabled when read
+    # barriers are enabled, meaning that we do not produce a CFG file
+    # as a side-effect of compilation, thus the Checker assertions
+    # cannot be checked. Disable Checker for those cases.
+    #
+    # TODO: Enable Checker when read barrier support is added to more
+    # architectures (b/12687968).
+    if [ "x$ART_USE_READ_BARRIER" = xtrue ]                    \
+       && (([ "x$host_mode" = "xyes" ]                         \
+            && ! arch_supports_read_barrier "$host_arch_name") \
+           || ([ "x$target_mode" = "xyes" ]                    \
+               && ! arch_supports_read_barrier "$target_arch_name")); then
+      run_checker="no"
     # In no-prebuild mode, the compiler is only invoked if both dex2oat and
     # patchoat are available. Disable Checker otherwise (b/22552692).
-    if [ "$prebuild_mode" = "yes" ] || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
+    elif [ "$prebuild_mode" = "yes" ] \
+         || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
       run_checker="yes"
 
       if [ "$target_mode" = "no" ]; then
@@ -708,18 +732,15 @@
 # To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
 build_file_size_limit=2048
 run_file_size_limit=2048
-if echo "$test_dir" | grep 089; then
-  build_file_size_limit=5120
-  run_file_size_limit=5120
-elif echo "$test_dir" | grep 083; then
+if echo "$test_dir" | grep -Eq "(083|089|964)" > /dev/null; then
   build_file_size_limit=5120
   run_file_size_limit=5120
 fi
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=16384
-  run_file_size_limit=16384
+  build_file_size_limit=24576
+  run_file_size_limit=24576
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
diff --git a/test/utils/python/testgen/mixins.py b/test/utils/python/testgen/mixins.py
index 085e51d..aa8943b 100644
--- a/test/utils/python/testgen/mixins.py
+++ b/test/utils/python/testgen/mixins.py
@@ -79,6 +79,12 @@
   """
   pass
 
+class JavaFileMixin(get_file_extension_mixin(".java")):
+  """
+  A mixin that defines that the file this class belongs to is get_name() + ".java".
+  """
+  pass
+
 class NameComparableMixin(object):
   """
   A mixin that defines the object comparison and related functionality in terms
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index d6f55aa..362ae25 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -13,10 +13,7 @@
    - Recommend how to start looking at a heap dump.
    - Say how to enable allocation sites.
    - Where to submit feedback, questions, and bug reports.
- * Submit perflib fix for getting stack traces, then uncomment that code in
-   AhatSnapshot to use that.
  * Dim 'image' and 'zygote' heap sizes slightly? Why do we even show these?
- * Filter out RootObjs in mSnapshot.getGCRoots, not RootsHandler.
  * Let user re-sort sites objects info by clicking column headers.
  * Let user re-sort "Objects" list.
  * Show site context and heap and class filter in "Objects" view?
@@ -25,23 +22,15 @@
  * Show root types.
  * Heaped Table
    - Make sortable by clicking on headers.
-   - Use consistent order for heap columns.
-      Sometimes I see "app" first, sometimes last (from one heap dump to
-      another) How about, always sort by name?
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
  * Should help be part of an AhatHandler, that automatically gets the menu and
    stylesheet link rather than duplicating that?
  * Show version number with --version.
  * Show somewhere where to send bugs.
- * /objects query takes a long time to load without parameters.
  * Include a link to /objects in the overview and menu?
  * Turn on LOCAL_JAVACFLAGS := -Xlint:unchecked -Werror
  * Use hex for object ids in URLs?
- * In general, all tables and descriptions should show a limited amount to
-   start, and only show more when requested by the user.
- * Don't have handlers inherit from HttpHandler
-   - because they should be independent from http.
 
  * [low priority] by site allocations won't line up if the stack has been
    truncated. Is there any way to manually line them up in that case?
@@ -60,8 +49,6 @@
    objects normally sorted by 'app' heap by default.
  * Visit /objects without parameters and verify it doesn't throw an exception.
  * Visit /objects with an invalid site, verify it doesn't throw an exception.
- * That we can view an array with 3 million elements in a reasonably short
-   amount of time (not more than 1 second?)
  * That we can view the list of all objects in a reasonably short amount of
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
@@ -72,8 +59,6 @@
 
 Reported Issues:
  * Request to be able to sort tables by size.
- * Hangs on showing large arrays, where hat does not hang.
-   - Solution is probably to not show all the array elements by default.
 
 Perflib Requests:
  * Class objects should have java.lang.Class as their class object, not null.
diff --git a/tools/ahat/src/AhatHandler.java b/tools/ahat/src/AhatHandler.java
index 2da02f8..d4b4d1b 100644
--- a/tools/ahat/src/AhatHandler.java
+++ b/tools/ahat/src/AhatHandler.java
@@ -16,51 +16,17 @@
 
 package com.android.ahat;
 
-import com.sun.net.httpserver.HttpExchange;
-import com.sun.net.httpserver.HttpHandler;
 import java.io.IOException;
-import java.io.PrintStream;
 
 /**
  * AhatHandler.
  *
- * Common base class of all the ahat HttpHandlers.
+ * Interface for an ahat page handler.
  */
-abstract class AhatHandler implements HttpHandler {
+interface AhatHandler {
 
-  protected AhatSnapshot mSnapshot;
-
-  public AhatHandler(AhatSnapshot snapshot) {
-    mSnapshot = snapshot;
-  }
-
-  public abstract void handle(Doc doc, Query query) throws IOException;
-
-  @Override
-  public void handle(HttpExchange exchange) throws IOException {
-    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
-    exchange.sendResponseHeaders(200, 0);
-    PrintStream ps = new PrintStream(exchange.getResponseBody());
-    try {
-      HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
-      DocString menu = new DocString();
-      menu.appendLink(DocString.uri("/"), DocString.text("overview"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("roots"), DocString.text("roots"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("sites"), DocString.text("allocations"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("help"), DocString.text("help"));
-      doc.menu(menu);
-      handle(doc, new Query(exchange.getRequestURI()));
-      doc.close();
-    } catch (RuntimeException e) {
-      // Print runtime exceptions to standard error for debugging purposes,
-      // because otherwise they are swallowed and not reported.
-      System.err.println("Exception when handling " + exchange.getRequestURI() + ": ");
-      e.printStackTrace();
-      throw e;
-    }
-    ps.close();
-  }
+  /**
+   * Handle the given query, rendering the page to the given document.
+   */
+  void handle(Doc doc, Query query) throws IOException;
 }
diff --git a/tools/ahat/src/AhatHttpHandler.java b/tools/ahat/src/AhatHttpHandler.java
new file mode 100644
index 0000000..178747c
--- /dev/null
+++ b/tools/ahat/src/AhatHttpHandler.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+import java.io.IOException;
+import java.io.PrintStream;
+
+/**
+ * AhatHttpHandler.
+ *
+ * HttpHandler for AhatHandlers.
+ */
+class AhatHttpHandler implements HttpHandler {
+
+  private AhatHandler mAhatHandler;
+
+  public AhatHttpHandler(AhatHandler handler) {
+    mAhatHandler = handler;
+  }
+
+  @Override
+  public void handle(HttpExchange exchange) throws IOException {
+    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
+    exchange.sendResponseHeaders(200, 0);
+    PrintStream ps = new PrintStream(exchange.getResponseBody());
+    try {
+      HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
+      DocString menu = new DocString();
+      menu.appendLink(DocString.uri("/"), DocString.text("overview"));
+      menu.append(" - ");
+      menu.appendLink(DocString.uri("rooted"), DocString.text("rooted"));
+      menu.append(" - ");
+      menu.appendLink(DocString.uri("sites"), DocString.text("allocations"));
+      menu.append(" - ");
+      menu.appendLink(DocString.uri("help"), DocString.text("help"));
+      doc.menu(menu);
+      mAhatHandler.handle(doc, new Query(exchange.getRequestURI()));
+      doc.close();
+    } catch (RuntimeException e) {
+      // Print runtime exceptions to standard error for debugging purposes,
+      // because otherwise they are swallowed and not reported.
+      System.err.println("Exception when handling " + exchange.getRequestURI() + ": ");
+      e.printStackTrace();
+      throw e;
+    }
+    ps.close();
+  }
+}
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 43658f3..0bf064e 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -19,7 +19,6 @@
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.RootObj;
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
@@ -30,7 +29,6 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -47,6 +45,9 @@
   // Map from Instance to the list of Instances it immediately dominates.
   private Map<Instance, List<Instance>> mDominated;
 
+  // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
+  private List<Instance> mRooted;
+
   private Site mRootSite;
   private Map<Heap, Long> mHeapSizes;
 
@@ -70,6 +71,7 @@
     mDominated = new HashMap<Instance, List<Instance>>();
     mRootSite = new Site("ROOT");
     mHeapSizes = new HashMap<Heap, Long>();
+    mRooted = new ArrayList<Instance>();
 
     ClassObj javaLangClass = mSnapshot.findClass("java.lang.Class");
     for (Heap heap : mHeaps) {
@@ -79,6 +81,10 @@
         if (dominator != null) {
           total += inst.getSize();
 
+          if (dominator == Snapshot.SENTINEL_ROOT) {
+            mRooted.add(inst);
+          }
+
           // Properly label the class of a class object.
           if (inst instanceof ClassObj && javaLangClass != null && inst.getClassObj() == null) {
               inst.setClassId(javaLangClass.getId());
@@ -126,8 +132,12 @@
     return mSnapshot.getHeap(name);
   }
 
-  public Collection<RootObj> getGCRoots() {
-    return mSnapshot.getGCRoots();
+  /**
+   * Returns a collection of instances whose immediate dominator is the
+   * SENTINEL_ROOT.
+   */
+  public List<Instance> getRooted() {
+    return mRooted;
   }
 
   public List<Heap> getHeaps() {
diff --git a/tools/ahat/src/Doc.java b/tools/ahat/src/Doc.java
index 7fa70de..5a70c4c 100644
--- a/tools/ahat/src/Doc.java
+++ b/tools/ahat/src/Doc.java
@@ -25,27 +25,27 @@
   /**
    * Output the title of the page.
    */
-  public void title(String format, Object... args);
+  void title(String format, Object... args);
 
   /**
    * Print a line of text for a page menu.
    */
-  public void menu(DocString string);
+  void menu(DocString string);
 
   /**
    * Start a new section with the given title.
    */
-  public void section(String title);
+  void section(String title);
 
   /**
    * Print a line of text in a normal font.
    */
-  public void println(DocString string);
+  void println(DocString string);
 
   /**
    * Print a line of text in a large font that is easy to see and click on.
    */
-  public void big(DocString string);
+  void big(DocString string);
 
   /**
    * Start a table with the given columns.
@@ -55,7 +55,7 @@
    * This should be followed by calls to the 'row' method to fill in the table
    * contents and the 'end' method to end the table.
    */
-  public void table(Column... columns);
+  void table(Column... columns);
 
   /**
    * Start a table with the following heading structure:
@@ -68,14 +68,14 @@
    * This should be followed by calls to the 'row' method to fill in the table
    * contents and the 'end' method to end the table.
    */
-  public void table(DocString description, List<Column> subcols, List<Column> cols);
+  void table(DocString description, List<Column> subcols, List<Column> cols);
 
   /**
    * Add a row to the currently active table.
    * The number of values must match the number of columns provided for the
    * currently active table.
    */
-  public void row(DocString... values);
+  void row(DocString... values);
 
   /**
    * Start a new description list.
@@ -83,15 +83,15 @@
    * This should be followed by calls to description() and finally a call to
    * end().
    */
-  public void descriptions();
+  void descriptions();
 
   /**
    * Add a description to the currently active description list.
    */
-  public void description(DocString key, DocString value);
+  void description(DocString key, DocString value);
 
   /**
    * End the currently active table or description list.
    */
-  public void end();
+  void end();
 }
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index 123d8be..34a5665 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -21,71 +21,35 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Class for rendering a list of instances dominated by a single instance in a
  * pretty way.
  */
 class DominatedList {
-  private static final int kIncrAmount = 100;
-  private static final int kDefaultShown = 100;
-
   /**
    * Render a table to the given HtmlWriter showing a pretty list of
    * instances.
    *
-   * Rather than show all of the instances (which may be very many), we use
-   * the query parameter "dominated" to specify a limited number of
-   * instances to show. The 'uri' parameter should be the current page URI, so
-   * that we can add links to "show more" and "show less" objects that go to
-   * the same page with only the number of objects adjusted.
+   * @param snapshot  the snapshot where the instances reside
+   * @param doc       the document to render the dominated list to
+   * @param query     the current page query
+   * @param id        a unique identifier to use for the dominated list in the current page
+   * @param instances the collection of instances to generate a list for
    */
-  public static void render(final AhatSnapshot snapshot, Doc doc,
-      Collection<Instance> instances, Query query) {
+  public static void render(final AhatSnapshot snapshot,
+      Doc doc, Query query, String id, Collection<Instance> instances) {
     List<Instance> insts = new ArrayList<Instance>(instances);
     Collections.sort(insts, Sort.defaultInstanceCompare(snapshot));
-
-    int numInstancesToShow = getNumInstancesToShow(query, insts.size());
-    List<Instance> shown = new ArrayList<Instance>(insts.subList(0, numInstancesToShow));
-    List<Instance> hidden = insts.subList(numInstancesToShow, insts.size());
-
-    // Add 'null' as a marker for "all the rest of the objects".
-    if (!hidden.isEmpty()) {
-      shown.add(null);
-    }
-    HeapTable.render(doc, new TableConfig(snapshot, hidden), snapshot, shown);
-
-    if (insts.size() > kDefaultShown) {
-      printMenu(doc, query, numInstancesToShow, insts.size());
-    }
+    HeapTable.render(doc, query, id, new TableConfig(snapshot), snapshot, insts);
   }
 
   private static class TableConfig implements HeapTable.TableConfig<Instance> {
     AhatSnapshot mSnapshot;
 
-    // Map from heap name to the total size of the instances not shown in the
-    // table.
-    Map<Heap, Long> mHiddenSizes;
-
-    public TableConfig(AhatSnapshot snapshot, List<Instance> hidden) {
+    public TableConfig(AhatSnapshot snapshot) {
       mSnapshot = snapshot;
-      mHiddenSizes = new HashMap<Heap, Long>();
-      for (Heap heap : snapshot.getHeaps()) {
-        mHiddenSizes.put(heap, 0L);
-      }
-
-      if (!hidden.isEmpty()) {
-        for (Instance inst : hidden) {
-          for (Heap heap : snapshot.getHeaps()) {
-            int index = snapshot.getHeapIndex(heap);
-            long size = inst.getRetainedSize(index);
-            mHiddenSizes.put(heap, mHiddenSizes.get(heap) + size);
-          }
-        }
-      }
     }
 
     @Override
@@ -95,9 +59,6 @@
 
     @Override
     public long getSize(Instance element, Heap heap) {
-      if (element == null) {
-        return mHiddenSizes.get(heap);
-      }
       int index = mSnapshot.getHeapIndex(heap);
       return element.getRetainedSize(index);
     }
@@ -110,56 +71,10 @@
         }
 
         public DocString render(Instance element) {
-          if (element == null) {
-            return DocString.text("...");
-          } else {
-            return Value.render(element);
-          }
+          return Value.render(element);
         }
       };
       return Collections.singletonList(value);
     }
   }
-
-  // Figure out how many objects to show based on the query parameter.
-  // The resulting value is guaranteed to be at least zero, and no greater
-  // than the number of total objects.
-  private static int getNumInstancesToShow(Query query, int totalNumInstances) {
-    String value = query.get("dominated", null);
-    try {
-      int count = Math.min(totalNumInstances, Integer.parseInt(value));
-      return Math.max(0, count);
-    } catch (NumberFormatException e) {
-      // We can't parse the value as a number. Ignore it.
-    }
-    return Math.min(kDefaultShown, totalNumInstances);
-  }
-
-  // Print a menu line after the table to control how many objects are shown.
-  // It has the form:
-  //  (showing X of Y objects - show none - show less - show more - show all)
-  private static void printMenu(Doc doc, Query query, int shown, int all) {
-    DocString menu = new DocString();
-    menu.appendFormat("(%d of %d objects shown - ", shown, all);
-    if (shown > 0) {
-      int less = Math.max(0, shown - kIncrAmount);
-      menu.appendLink(query.with("dominated", 0), DocString.text("show none"));
-      menu.append(" - ");
-      menu.appendLink(query.with("dominated", less), DocString.text("show less"));
-      menu.append(" - ");
-    } else {
-      menu.append("show none - show less - ");
-    }
-    if (shown < all) {
-      int more = Math.min(shown + kIncrAmount, all);
-      menu.appendLink(query.with("dominated", more), DocString.text("show more"));
-      menu.append(" - ");
-      menu.appendLink(query.with("dominated", all), DocString.text("show all"));
-      menu.append(")");
-    } else {
-      menu.append("show more - show all)");
-    }
-    doc.println(menu);
-  }
 }
-
diff --git a/tools/ahat/src/HeapTable.java b/tools/ahat/src/HeapTable.java
index 37d5816..ed11d17 100644
--- a/tools/ahat/src/HeapTable.java
+++ b/tools/ahat/src/HeapTable.java
@@ -18,7 +18,9 @@
 
 import com.android.tools.perflib.heap.Heap;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Class for rendering a table that includes sizes of some kind for each heap.
@@ -27,22 +29,27 @@
   /**
    * Configuration for a value column of a heap table.
    */
-  public static interface ValueConfig<T> {
-    public String getDescription();
-    public DocString render(T element);
+  public interface ValueConfig<T> {
+    String getDescription();
+    DocString render(T element);
   }
 
   /**
    * Configuration for the HeapTable.
    */
-  public static interface TableConfig<T> {
-    public String getHeapsDescription();
-    public long getSize(T element, Heap heap);
-    public List<ValueConfig<T>> getValueConfigs();
+  public interface TableConfig<T> {
+    String getHeapsDescription();
+    long getSize(T element, Heap heap);
+    List<ValueConfig<T>> getValueConfigs();
   }
 
-  public static <T> void render(Doc doc, TableConfig<T> config,
-      AhatSnapshot snapshot, List<T> elements) {
+  /**
+   * Render the table to the given document.
+   * @param query - The page query.
+   * @param id - A unique identifier for the table on the page.
+   */
+  public static <T> void render(Doc doc, Query query, String id,
+      TableConfig<T> config, AhatSnapshot snapshot, List<T> elements) {
     // Only show the heaps that have non-zero entries.
     List<Heap> heaps = new ArrayList<Heap>();
     for (Heap heap : snapshot.getHeaps()) {
@@ -68,9 +75,10 @@
     }
     doc.table(DocString.text(config.getHeapsDescription()), subcols, cols);
 
-    // Print the entries.
+    // Print the entries up to the selected limit.
+    SubsetSelector<T> selector = new SubsetSelector(query, id, elements);
     ArrayList<DocString> vals = new ArrayList<DocString>();
-    for (T elem : elements) {
+    for (T elem : selector.selected()) {
       vals.clear();
       long total = 0;
       for (Heap heap : heaps) {
@@ -87,7 +95,39 @@
       }
       doc.row(vals.toArray(new DocString[0]));
     }
+
+    // Print a summary of the remaining entries if there are any.
+    List<T> remaining = selector.remaining();
+    if (!remaining.isEmpty()) {
+      Map<Heap, Long> summary = new HashMap<Heap, Long>();
+      for (Heap heap : heaps) {
+        summary.put(heap, 0L);
+      }
+
+      for (T elem : remaining) {
+        for (Heap heap : heaps) {
+          summary.put(heap, summary.get(heap) + config.getSize(elem, heap));
+        }
+      }
+
+      vals.clear();
+      long total = 0;
+      for (Heap heap : heaps) {
+        long size = summary.get(heap);
+        total += size;
+        vals.add(DocString.format("%,14d", size));
+      }
+      if (showTotal) {
+        vals.add(DocString.format("%,14d", total));
+      }
+
+      for (ValueConfig<T> value : values) {
+        vals.add(DocString.text("..."));
+      }
+      doc.row(vals.toArray(new DocString[0]));
+    }
     doc.end();
+    selector.render(doc);
   }
 
   // Returns true if the given heap has a non-zero size entry.
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index 1563aa0..ebd49d7 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -73,11 +73,11 @@
     InetAddress loopback = InetAddress.getLoopbackAddress();
     InetSocketAddress addr = new InetSocketAddress(loopback, port);
     HttpServer server = HttpServer.create(addr, 0);
-    server.createContext("/", new OverviewHandler(ahat, hprof));
-    server.createContext("/roots", new RootsHandler(ahat));
-    server.createContext("/object", new ObjectHandler(ahat));
-    server.createContext("/objects", new ObjectsHandler(ahat));
-    server.createContext("/site", new SiteHandler(ahat));
+    server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
+    server.createContext("/rooted", new AhatHttpHandler(new RootedHandler(ahat)));
+    server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
+    server.createContext("/objects", new AhatHttpHandler(new ObjectsHandler(ahat)));
+    server.createContext("/site", new AhatHttpHandler(new SiteHandler(ahat)));
     server.createContext("/bitmap", new BitmapHandler(ahat));
     server.createContext("/help", new StaticHandler("help.html", "text/html"));
     server.createContext("/style.css", new StaticHandler("style.css", "text/css"));
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 5e321e2..1305070 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -25,13 +25,26 @@
 import com.android.tools.perflib.heap.RootObj;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-class ObjectHandler extends AhatHandler {
+class ObjectHandler implements AhatHandler {
+
+  private static final String ARRAY_ELEMENTS_ID = "elements";
+  private static final String DOMINATOR_PATH_ID = "dompath";
+  private static final String ALLOCATION_SITE_ID = "frames";
+  private static final String DOMINATED_OBJECTS_ID = "dominated";
+  private static final String INSTANCE_FIELDS_ID = "ifields";
+  private static final String STATIC_FIELDS_ID = "sfields";
+  private static final String HARD_REFS_ID = "refs";
+  private static final String SOFT_REFS_ID = "srefs";
+
+  private AhatSnapshot mSnapshot;
+
   public ObjectHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -46,8 +59,8 @@
     doc.title("Object %08x", inst.getUniqueId());
     doc.big(Value.render(inst));
 
-    printAllocationSite(doc, inst);
-    printDominatorPath(doc, inst);
+    printAllocationSite(doc, query, inst);
+    printDominatorPath(doc, query, inst);
 
     doc.section("Object Info");
     ClassObj cls = inst.getClassObj();
@@ -62,39 +75,46 @@
 
     printBitmap(doc, inst);
     if (inst instanceof ClassInstance) {
-      printClassInstanceFields(doc, (ClassInstance)inst);
+      printClassInstanceFields(doc, query, (ClassInstance)inst);
     } else if (inst instanceof ArrayInstance) {
-      printArrayElements(doc, (ArrayInstance)inst);
+      printArrayElements(doc, query, (ArrayInstance)inst);
     } else if (inst instanceof ClassObj) {
-      printClassInfo(doc, (ClassObj)inst);
+      printClassInfo(doc, query, (ClassObj)inst);
     }
-    printReferences(doc, inst);
+    printReferences(doc, query, inst);
     printDominatedObjects(doc, query, inst);
   }
 
-  private static void printClassInstanceFields(Doc doc, ClassInstance inst) {
+  private static void printClassInstanceFields(Doc doc, Query query, ClassInstance inst) {
     doc.section("Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    for (ClassInstance.FieldValue field : inst.getValues()) {
+    SubsetSelector<ClassInstance.FieldValue> selector
+      = new SubsetSelector(query, INSTANCE_FIELDS_ID, inst.getValues());
+    for (ClassInstance.FieldValue field : selector.selected()) {
       doc.row(
           DocString.text(field.getField().getType().toString()),
           DocString.text(field.getField().getName()),
           Value.render(field.getValue()));
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printArrayElements(Doc doc, ArrayInstance array) {
+  private static void printArrayElements(Doc doc, Query query, ArrayInstance array) {
     doc.section("Array Elements");
     doc.table(new Column("Index", Column.Align.RIGHT), new Column("Value"));
-    Object[] elements = array.getValues();
-    for (int i = 0; i < elements.length; i++) {
-      doc.row(DocString.format("%d", i), Value.render(elements[i]));
+    List<Object> elements = Arrays.asList(array.getValues());
+    SubsetSelector<Object> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
+    int i = 0;
+    for (Object elem : selector.selected()) {
+      doc.row(DocString.format("%d", i), Value.render(elem));
+      i++;
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printClassInfo(Doc doc, ClassObj clsobj) {
+  private static void printClassInfo(Doc doc, Query query, ClassObj clsobj) {
     doc.section("Class Info");
     doc.descriptions();
     doc.description(DocString.text("Super Class"), Value.render(clsobj.getSuperClassObj()));
@@ -103,41 +123,52 @@
 
     doc.section("Static Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    for (Map.Entry<Field, Object> field : clsobj.getStaticFieldValues().entrySet()) {
+    List<Map.Entry<Field, Object>> fields
+      = new ArrayList<Map.Entry<Field, Object>>(clsobj.getStaticFieldValues().entrySet());
+    SubsetSelector<Map.Entry<Field, Object>> selector
+      = new SubsetSelector(query, STATIC_FIELDS_ID, fields);
+    for (Map.Entry<Field, Object> field : selector.selected()) {
       doc.row(
           DocString.text(field.getKey().getType().toString()),
           DocString.text(field.getKey().getName()),
           Value.render(field.getValue()));
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printReferences(Doc doc, Instance inst) {
+  private static void printReferences(Doc doc, Query query, Instance inst) {
     doc.section("Objects with References to this Object");
     if (inst.getHardReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      for (Instance ref : inst.getHardReferences()) {
+      List<Instance> references = inst.getHardReferences();
+      SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
+      for (Instance ref : selector.selected()) {
         doc.row(Value.render(ref));
       }
       doc.end();
+      selector.render(doc);
     }
 
     if (inst.getSoftReferences() != null) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      for (Instance ref : inst.getSoftReferences()) {
-        doc.row(Value.render(inst));
+      List<Instance> references = inst.getSoftReferences();
+      SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
+      for (Instance ref : selector.selected()) {
+        doc.row(Value.render(ref));
       }
       doc.end();
+      selector.render(doc);
     }
   }
 
-  private void printAllocationSite(Doc doc, Instance inst) {
+  private void printAllocationSite(Doc doc, Query query, Instance inst) {
     doc.section("Allocation Site");
     Site site = mSnapshot.getSiteForInstance(inst);
-    SitePrinter.printSite(doc, mSnapshot, site);
+    SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
   }
 
   // Draw the bitmap corresponding to this instance if there is one.
@@ -150,7 +181,7 @@
     }
   }
 
-  private void printDominatorPath(Doc doc, Instance inst) {
+  private void printDominatorPath(Doc doc, Query query, Instance inst) {
     doc.section("Dominator Path from Root");
     List<Instance> path = new ArrayList<Instance>();
     for (Instance parent = inst;
@@ -184,7 +215,7 @@
 
           public DocString render(Instance element) {
             if (element == null) {
-              return DocString.link(DocString.uri("roots"), DocString.text("ROOT"));
+              return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
             } else {
               return DocString.text("→ ").append(Value.render(element));
             }
@@ -193,14 +224,14 @@
         return Collections.singletonList(value);
       }
     };
-    HeapTable.render(doc, table, mSnapshot, path);
+    HeapTable.render(doc, query, DOMINATOR_PATH_ID, table, mSnapshot, path);
   }
 
   public void printDominatedObjects(Doc doc, Query query, Instance inst) {
     doc.section("Immediately Dominated Objects");
     List<Instance> instances = mSnapshot.getDominated(inst);
     if (instances != null) {
-      DominatedList.render(mSnapshot, doc, instances, query);
+      DominatedList.render(mSnapshot, doc, query, DOMINATED_OBJECTS_ID, instances);
     } else {
       doc.println(DocString.text("(none)"));
     }
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 4e9c42e..8ad3f48 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -22,9 +22,13 @@
 import java.util.Collections;
 import java.util.List;
 
-class ObjectsHandler extends AhatHandler {
+class ObjectsHandler implements AhatHandler {
+  private static final String OBJECTS_ID = "objects";
+
+  private AhatSnapshot mSnapshot;
+
   public ObjectsHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -51,13 +55,15 @@
         new Column("Size", Column.Align.RIGHT),
         new Column("Heap"),
         new Column("Object"));
-    for (Instance inst : insts) {
+    SubsetSelector<Instance> selector = new SubsetSelector(query, OBJECTS_ID, insts);
+    for (Instance inst : selector.selected()) {
       doc.row(
           DocString.format("%,d", inst.getSize()),
           DocString.text(inst.getHeap().getName()),
           Value.render(inst));
     }
     doc.end();
+    selector.render(doc);
   }
 }
 
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index f49c009..0fe4fba 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -22,11 +22,15 @@
 import java.util.Collections;
 import java.util.List;
 
-class OverviewHandler extends AhatHandler {
+class OverviewHandler implements AhatHandler {
+
+  private static final String OVERVIEW_ID = "overview";
+
+  private AhatSnapshot mSnapshot;
   private File mHprof;
 
   public OverviewHandler(AhatSnapshot snapshot, File hprof) {
-    super(snapshot);
+    mSnapshot = snapshot;
     mHprof = hprof;
   }
 
@@ -43,10 +47,10 @@
     doc.end();
 
     doc.section("Heap Sizes");
-    printHeapSizes(doc);
+    printHeapSizes(doc, query);
 
     DocString menu = new DocString();
-    menu.appendLink(DocString.uri("roots"), DocString.text("Roots"));
+    menu.appendLink(DocString.uri("rooted"), DocString.text("Rooted"));
     menu.append(" - ");
     menu.appendLink(DocString.uri("site"), DocString.text("Allocations"));
     menu.append(" - ");
@@ -54,7 +58,7 @@
     doc.big(menu);
   }
 
-  private void printHeapSizes(Doc doc) {
+  private void printHeapSizes(Doc doc, Query query) {
     List<Object> dummy = Collections.singletonList(null);
 
     HeapTable.TableConfig<Object> table = new HeapTable.TableConfig<Object>() {
@@ -70,7 +74,7 @@
         return Collections.emptyList();
       }
     };
-    HeapTable.render(doc, table, mSnapshot, dummy);
+    HeapTable.render(doc, query, OVERVIEW_ID, table, mSnapshot, dummy);
   }
 }
 
diff --git a/tools/ahat/src/RootedHandler.java b/tools/ahat/src/RootedHandler.java
new file mode 100644
index 0000000..ec3272f
--- /dev/null
+++ b/tools/ahat/src/RootedHandler.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.io.IOException;
+
+class RootedHandler implements AhatHandler {
+
+  private static final String ROOTED_ID = "rooted";
+
+  private AhatSnapshot mSnapshot;
+
+  public RootedHandler(AhatSnapshot snapshot) {
+    mSnapshot = snapshot;
+  }
+
+  @Override
+  public void handle(Doc doc, Query query) throws IOException {
+    doc.title("Rooted");
+    DominatedList.render(mSnapshot, doc, query, ROOTED_ID, mSnapshot.getRooted());
+  }
+}
diff --git a/tools/ahat/src/RootsHandler.java b/tools/ahat/src/RootsHandler.java
deleted file mode 100644
index 185b9bf..0000000
--- a/tools/ahat/src/RootsHandler.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.ahat;
-
-import com.android.tools.perflib.heap.Instance;
-import com.android.tools.perflib.heap.RootObj;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-class RootsHandler extends AhatHandler {
-  public RootsHandler(AhatSnapshot snapshot) {
-    super(snapshot);
-  }
-
-  @Override
-  public void handle(Doc doc, Query query) throws IOException {
-    doc.title("Roots");
-
-    Set<Instance> rootset = new HashSet<Instance>();
-    for (RootObj root : mSnapshot.getGCRoots()) {
-      Instance inst = root.getReferredInstance();
-      if (inst != null) {
-        rootset.add(inst);
-      }
-    }
-
-    List<Instance> roots = new ArrayList<Instance>();
-    for (Instance inst : rootset) {
-      roots.add(inst);
-    }
-    DominatedList.render(mSnapshot, doc, roots, query);
-  }
-}
-
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index 0a9381e..0425a5a 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -22,9 +22,15 @@
 import java.util.Comparator;
 import java.util.List;
 
-class SiteHandler extends AhatHandler {
+class SiteHandler implements AhatHandler {
+  private static final String ALLOCATION_SITE_ID = "frames";
+  private static final String SITES_CALLED_ID = "called";
+  private static final String OBJECTS_ALLOCATED_ID = "objects";
+
+  private AhatSnapshot mSnapshot;
+
   public SiteHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -35,7 +41,7 @@
 
     doc.title("Site %s", site.getName());
     doc.section("Allocation Site");
-    SitePrinter.printSite(doc, mSnapshot, site);
+    SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
 
     doc.section("Sites Called from Here");
     List<Site> children = site.getChildren();
@@ -69,7 +75,7 @@
           return Collections.singletonList(value);
         }
       };
-      HeapTable.render(doc, table, mSnapshot, children);
+      HeapTable.render(doc, query, SITES_CALLED_ID, table, mSnapshot, children);
     }
 
     doc.section("Objects Allocated");
@@ -84,7 +90,9 @@
         new Sort.ObjectsInfoBySize(),
         new Sort.ObjectsInfoByClassName());
     Collections.sort(infos, compare);
-    for (Site.ObjectsInfo info : infos) {
+    SubsetSelector<Site.ObjectsInfo> selector
+      = new SubsetSelector(query, OBJECTS_ALLOCATED_ID, infos);
+    for (Site.ObjectsInfo info : selector.selected()) {
       String className = AhatSnapshot.getClassName(info.classObj);
       doc.row(
           DocString.format("%,14d", info.numBytes),
@@ -96,6 +104,7 @@
           Value.render(info.classObj));
     }
     doc.end();
+    selector.render(doc);
   }
 }
 
diff --git a/tools/ahat/src/SitePrinter.java b/tools/ahat/src/SitePrinter.java
index be87032..2c06b47 100644
--- a/tools/ahat/src/SitePrinter.java
+++ b/tools/ahat/src/SitePrinter.java
@@ -22,7 +22,7 @@
 import java.util.List;
 
 class SitePrinter {
-  public static void printSite(Doc doc, AhatSnapshot snapshot, Site site) {
+  public static void printSite(AhatSnapshot snapshot, Doc doc, Query query, String id, Site site) {
     List<Site> path = new ArrayList<Site>();
     for (Site parent = site; parent != null; parent = parent.getParent()) {
       path.add(parent);
@@ -60,6 +60,6 @@
         return Collections.singletonList(value);
       }
     };
-    HeapTable.render(doc, table, snapshot, path);
+    HeapTable.render(doc, query, id, table, snapshot, path);
   }
 }
diff --git a/tools/ahat/src/SubsetSelector.java b/tools/ahat/src/SubsetSelector.java
new file mode 100644
index 0000000..79399c1
--- /dev/null
+++ b/tools/ahat/src/SubsetSelector.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.util.List;
+
+/**
+ * The SubsetSelector is that can be added to a page that lets the
+ * user select a limited number of elements to show.
+ * This is used to limit the number of elements shown on a page by default,
+ * requiring the user to explicitly request more, so users not interested in
+ * more don't have to wait for everything to render.
+ */
+class SubsetSelector<T> {
+  private static final int kIncrAmount = 1000;
+  private static final int kDefaultShown = 1000;
+
+  private Query mQuery;
+  private String mId;
+  private int mLimit;
+  private List<T> mElements;
+
+  /**
+   * @param id - the name of the query parameter key that should hold
+   * the limit selectors selected value.
+   * @param query - The query for the current page. This is required so the
+   * LimitSelector can add a link to the same page with modified limit
+   * selection.
+   * @param elements - the elements to select from. The collection of elements
+   * should not be modified during the lifetime of the SubsetSelector object.
+   */
+  public SubsetSelector(Query query, String id, List<T> elements) {
+    mQuery = query;
+    mId = id;
+    mLimit = getSelectedLimit(query, id, elements.size());
+    mElements = elements;
+  }
+
+  // Return the list of elements included in the selected subset.
+  public List<T> selected() {
+    return mElements.subList(0, mLimit);
+  }
+
+  // Return the list of remaining elements not included in the selected subset.
+  public List<T> remaining() {
+    return mElements.subList(mLimit, mElements.size());
+  }
+
+  /**
+   * Returns the currently selected limit.
+   * @param query the current page query
+   * @param size the total number of elements to select from
+   * @return the number of selected elements
+   */
+  private static int getSelectedLimit(Query query, String id, int size) {
+    String value = query.get(id, null);
+    try {
+      int ivalue = Math.min(size, Integer.parseInt(value));
+      return Math.max(0, ivalue);
+    } catch (NumberFormatException e) {
+      // We can't parse the value as a number. Ignore it.
+    }
+    return Math.min(kDefaultShown, size);
+  }
+
+  // Render the limit selector to the given doc.
+  // It has the form:
+  //  (showing X of Y - show none - show less - show more - show all)
+  public void render(Doc doc) {
+    int all = mElements.size();
+    if (all > kDefaultShown) {
+      DocString menu = new DocString();
+      menu.appendFormat("(%d of %d elements shown - ", mLimit, all);
+      if (mLimit > 0) {
+        int less = Math.max(0, mLimit - kIncrAmount);
+        menu.appendLink(mQuery.with(mId, 0), DocString.text("show none"));
+        menu.append(" - ");
+        menu.appendLink(mQuery.with(mId, less), DocString.text("show less"));
+        menu.append(" - ");
+      } else {
+        menu.append("show none - show less - ");
+      }
+      if (mLimit < all) {
+        int more = Math.min(mLimit + kIncrAmount, all);
+        menu.appendLink(mQuery.with(mId, more), DocString.text("show more"));
+        menu.append(" - ");
+        menu.appendLink(mQuery.with(mId, all), DocString.text("show all"));
+        menu.append(")");
+      } else {
+        menu.append("show more - show all)");
+      }
+      doc.println(menu);
+    }
+  }
+}
diff --git a/tools/ahat/src/help.html b/tools/ahat/src/help.html
index b7ae2ce..92ec37d 100644
--- a/tools/ahat/src/help.html
+++ b/tools/ahat/src/help.html
@@ -20,7 +20,7 @@
 
 <div class="menu">
   <a href="/">overview</a> -
-  <a href="roots">roots</a> -
+  <a href="rooted">rooted</a> -
   <a href="sites">allocations</a> -
   <a href="help">help</a>
 </div>
@@ -29,7 +29,7 @@
 <h2>Information shown by ahat:</h2>
 <ul>
   <li><a href="/">The total bytes retained by heap.</a></li>
-  <li><a href="/roots">A list of root objects and their retained sizes for each heap.</a></li>
+  <li><a href="/rooted">A list of rooted objects and their retained sizes for each heap.</a></li>
   <li>Information about each allocated object:
     <ul>
       <li>The allocation site (stack trace) of the object (if available).</li>
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 7b8774a..90cd7af 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -39,6 +39,15 @@
     public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
     public PhantomReference aPhantomReference = new PhantomReference(anObject, referenceQueue);
     public WeakReference aWeakReference = new WeakReference(anObject, referenceQueue);
+    public byte[] bigArray;
+
+    DumpedStuff() {
+      int N = 1000000;
+      bigArray = new byte[N];
+      for (int i = 0; i < N; i++) {
+        bigArray[i] = (byte)((i*i) & 0xFF);
+      }
+    }
   }
 
   public static void main(String[] args) throws IOException {
diff --git a/tools/ahat/test/PerformanceTest.java b/tools/ahat/test/PerformanceTest.java
new file mode 100644
index 0000000..6e46800
--- /dev/null
+++ b/tools/ahat/test/PerformanceTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.tools.perflib.heap.Instance;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+public class PerformanceTest {
+  private static class NullOutputStream extends OutputStream {
+    public void write(int b) throws IOException {
+    }
+  }
+
+  @Test
+  public void bigArray() throws IOException {
+    // It should not take more than 1 second to load the default object view
+    // for any object, including big arrays.
+    TestDump dump = TestDump.getTestDump();
+
+    Instance bigArray = (Instance)dump.getDumpedThing("bigArray");
+    assertNotNull(bigArray);
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    AhatHandler handler = new ObjectHandler(snapshot);
+
+    PrintStream ps = new PrintStream(new NullOutputStream());
+    HtmlDoc doc = new HtmlDoc(ps, DocString.text("bigArray test"), DocString.uri("style.css"));
+    String uri = "http://localhost:7100/object?id=" + bigArray.getId();
+    Query query = new Query(DocString.uri(uri));
+
+    long start = System.currentTimeMillis();
+    handler.handle(doc, query);
+    long time = System.currentTimeMillis() - start;
+    assertTrue("bigArray took too long: " + time + "ms", time < 1000);
+  }
+}
diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java
index bab7121..e8894e2 100644
--- a/tools/ahat/test/Tests.java
+++ b/tools/ahat/test/Tests.java
@@ -23,8 +23,9 @@
     if (args.length == 0) {
       args = new String[]{
         "com.android.ahat.InstanceUtilsTest",
+        "com.android.ahat.PerformanceTest",
         "com.android.ahat.QueryTest",
-        "com.android.ahat.SortTest"
+        "com.android.ahat.SortTest",
       };
     }
     JUnitCore.main(args);
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 631e0a0..02787fb 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar vogar.jar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
+common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 9a8b462..a5476f7 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -164,5 +164,11 @@
   names: ["libcore.io.OsTest#test_byteBufferPositions_sendto_recvfrom_af_inet6",
           "libcore.io.OsTest#test_sendtoSocketAddress_af_inet6"],
   bug: 25178637
+},
+{
+  description: "Non-deterministic test because of a dependency on weak ref collection.",
+  result: EXEC_FAILED,
+  names: ["org.apache.harmony.tests.java.util.WeakHashMapTest#test_keySet"],
+  bug: 25437292
 }
 ]
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index de27a6f..c79f4b9 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -28,6 +28,18 @@
   exit 1
 fi
 
+if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
+  # For the moment, skip JDWP tests when read barriers are enabled, as
+  # they sometimes exhibit a deadlock issue with the concurrent
+  # copying collector in the read barrier configuration, between the
+  # HeapTaskDeamon and the JDWP thread (b/25800335).
+  #
+  # TODO: Re-enable the JDWP tests when this deadlock issue is fixed.
+  echo "JDWP tests are temporarily disabled in the read barrier configuration because of"
+  echo "a deadlock issue (b/25800335)."
+  exit 0
+fi
+
 art="/data/local/tmp/system/bin/art"
 art_debugee="sh /data/local/tmp/system/bin/art"
 args=$@
@@ -43,9 +55,11 @@
 vm_args=""
 # By default, we run the whole JDWP test suite.
 test="org.apache.harmony.jpda.tests.share.AllTests"
+host="no"
 
 while true; do
   if [[ "$1" == "--mode=host" ]]; then
+    host="yes"
     # Specify bash explicitly since the art script cannot, since it has to run on the device
     # with mksh.
     art="bash ${OUT_DIR-out}/host/linux-x86/bin/art"
@@ -114,7 +128,19 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test
+
+vogar_exit_status=$?
+
+echo "Killing stalled dalvikvm processes..."
+if [[ $host == "yes" ]]; then
+  pkill -9 -f /bin/dalvikvm
+else
+  adb shell pkill -9 -f /bin/dalvikvm
+fi
+echo "Done."
+
+exit $vogar_exit_status