Merge "MIPS32: Bit rotation intrinsics"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index dcde5ab..a17da34 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -248,6 +248,7 @@
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
   compiler/jni/jni_compiler_test.cc \
+  compiler/linker/output_stream_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/bounds_check_elimination_test.cc \
   compiler/optimizing/dominator_test.cc \
@@ -266,7 +267,6 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/output_stream_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 564bd7e..348eabd 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -58,6 +58,10 @@
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
+	linker/buffered_output_stream.cc \
+	linker/file_output_stream.cc \
+	linker/output_stream.cc \
+	linker/vector_output_stream.cc \
 	linker/relative_patcher.cc \
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
@@ -69,6 +73,7 @@
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
@@ -99,16 +104,12 @@
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
-	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
 	elf_writer_debug.cc \
 	elf_writer_quick.cc \
-	file_output_stream.cc \
 	image_writer.cc \
-	oat_writer.cc \
-	output_stream.cc \
-	vector_output_stream.cc
+	oat_writer.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d67087e..9d3af16 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -45,7 +45,6 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
 #include "profiler.h"
@@ -77,9 +76,6 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
-
 // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
@@ -2514,19 +2510,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::WriteElf(const std::string& android_root,
-                              bool is_host,
-                              const std::vector<const art::DexFile*>& dex_files,
-                              OatWriter* oat_writer,
-                              art::File* file)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
-    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  } else {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  }
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
     return false;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d90d610..1347b37 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -59,7 +59,6 @@
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
-class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -398,12 +397,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  bool WriteElf(const std::string& android_root,
-                bool is_host,
-                const std::vector<const DexFile*>& dex_files,
-                OatWriter* oat_writer,
-                File* file);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index 5464ed9..c3a3ca9 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -29,6 +29,7 @@
 #include "common_runtime_test.h"
 #include "elf_builder.h"
 #include "gtest/gtest.h"
+#include "linker/file_output_stream.h"
 #include "os.h"
 
 namespace art {
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
new file mode 100644
index 0000000..a391e4d
--- /dev/null
+++ b/compiler/dwarf/method_debug_info.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+#define ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+
+#include "dex_file.h"
+
+namespace art {
+class CompiledMethod;
+namespace dwarf {
+
+struct MethodDebugInfo {
+  const DexFile* dex_file_;
+  size_t class_def_index_;
+  uint32_t dex_method_index_;
+  uint32_t access_flags_;
+  const DexFile::CodeItem* code_item_;
+  bool deduped_;
+  uint32_t low_pc_;
+  uint32_t high_pc_;
+  CompiledMethod* compiled_method_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 6e8dfd6..bb07cc2 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -23,10 +23,10 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "elf_utils.h"
-#include "file_output_stream.h"
 #include "leb128.h"
+#include "linker/error_delaying_output_stream.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -100,7 +100,7 @@
       header_.sh_entsize = entsize;
     }
 
-    virtual ~Section() {
+    ~Section() OVERRIDE {
       if (started_) {
         CHECK(finished_);
       }
@@ -120,8 +120,8 @@
       sections.push_back(this);
       // Align file position.
       if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign);
-        owner_->Seek(header_.sh_offset, kSeekSet);
+        header_.sh_offset = RoundUp(owner_->stream_.Seek(0, kSeekCurrent), header_.sh_addralign);
+        owner_->stream_.Seek(header_.sh_offset, kSeekSet);
       }
       // Align virtual memory address.
       if ((header_.sh_flags & SHF_ALLOC) != 0) {
@@ -139,7 +139,7 @@
         CHECK_GT(header_.sh_size, 0u);
       } else {
         // Use the current file position to determine section size.
-        off_t file_offset = owner_->Seek(0, kSeekCurrent);
+        off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
         CHECK_GE(file_offset, (off_t)header_.sh_offset);
         header_.sh_size = file_offset - header_.sh_offset;
       }
@@ -161,7 +161,7 @@
       } else {
         CHECK(started_);
         CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
-        return owner_->Seek(0, kSeekCurrent) - header_.sh_offset;
+        return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset;
       }
     }
 
@@ -176,15 +176,20 @@
     bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
       CHECK(started_);
       CHECK(!finished_);
-      owner_->WriteFully(buffer, byte_count);
-      return true;
+      return owner_->stream_.WriteFully(buffer, byte_count);
     }
 
     // This function always succeeds to simplify code.
     // Use builder's Good() to check the actual status.
     off_t Seek(off_t offset, Whence whence) OVERRIDE {
       // Forward the seek as-is and trust the caller to use it reasonably.
-      return owner_->Seek(offset, whence);
+      return owner_->stream_.Seek(offset, whence);
+    }
+
+    // This function flushes the output and returns whether it succeeded.
+    // If there was a previous failure, this does nothing and returns false, i.e. failed.
+    bool Flush() OVERRIDE {
+      return owner_->stream_.Flush();
     }
 
     Elf_Word GetSectionIndex() const {
@@ -270,26 +275,24 @@
   };
 
   ElfBuilder(InstructionSet isa, OutputStream* output)
-    : isa_(isa),
-      output_(output),
-      output_good_(true),
-      output_offset_(0),
-      rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
-      bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
-      dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
-      dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
-      eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
-      strtab_(this, ".strtab", 0, kPageSize),
-      symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
-      debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
-      debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      shstrtab_(this, ".shstrtab", 0, 1),
-      virtual_address_(0) {
+      : isa_(isa),
+        stream_(output),
+        rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+        bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+        dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+        hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+        dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+        eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+        strtab_(this, ".strtab", 0, kPageSize),
+        symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+        debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+        debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        shstrtab_(this, ".shstrtab", 0, 1),
+        virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
     bss_.phdr_flags_ = PF_R | PF_W;
     dynamic_.phdr_flags_ = PF_R | PF_W;
@@ -312,7 +315,7 @@
 
   // Encode patch locations as LEB128 list of deltas between consecutive addresses.
   // (exposed publicly for tests)
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+  static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations,
                                std::vector<uint8_t>* buffer) {
     buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
     uintptr_t address = 0;  // relative to start of section.
@@ -323,9 +326,9 @@
     }
   }
 
-  void WritePatches(const char* name, const std::vector<uintptr_t>* patch_locations) {
+  void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) {
     std::vector<uint8_t> buffer;
-    EncodeOatPatches(*patch_locations, &buffer);
+    EncodeOatPatches(patch_locations, &buffer);
     std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
     s->Start();
     s->WriteFully(buffer.data(), buffer.size());
@@ -346,7 +349,7 @@
     // We do not know the number of headers until later, so
     // it is easiest to just reserve a fixed amount of space.
     int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Seek(size, kSeekSet);
+    stream_.Seek(size, kSeekSet);
     virtual_address_ += size;
   }
 
@@ -370,9 +373,14 @@
       shdrs.push_back(section->header_);
     }
     Elf_Off section_headers_offset;
-    section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off));
-    Seek(section_headers_offset, kSeekSet);
-    WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+    section_headers_offset = RoundUp(stream_.Seek(0, kSeekCurrent), sizeof(Elf_Off));
+    stream_.Seek(section_headers_offset, kSeekSet);
+    stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Flush everything else before writing the program headers. This should prevent
+    // the OS from reordering writes, so that we don't end up with valid headers
+    // and partially written data if we suddenly lose power, for example.
+    stream_.Flush();
 
     // Write the initial file headers.
     std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
@@ -382,9 +390,10 @@
     elf_header.e_phnum = phdrs.size();
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
-    Seek(0, kSeekSet);
-    WriteFully(&elf_header, sizeof(elf_header));
-    WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Seek(0, kSeekSet);
+    stream_.WriteFully(&elf_header, sizeof(elf_header));
+    stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Flush();
   }
 
   // The running program does not have access to section headers
@@ -462,53 +471,15 @@
 
   // Returns true if all writes and seeks on the output stream succeeded.
   bool Good() {
-    return output_good_;
+    return stream_.Good();
+  }
+
+  // Returns the builder's internal stream.
+  OutputStream* GetStream() {
+    return &stream_;
   }
 
  private:
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  void WriteFully(const void* buffer, size_t byte_count) {
-    if (output_good_) {
-      if (!output_->WriteFully(buffer, byte_count)) {
-        PLOG(ERROR) << "Failed to write " << byte_count
-                    << " bytes to ELF file at offset " << output_offset_;
-        output_good_ = false;
-      }
-    }
-    output_offset_ += byte_count;
-  }
-
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  off_t Seek(off_t offset, Whence whence) {
-    // We keep shadow copy of the offset so that we return
-    // the expected value even if the output stream failed.
-    off_t new_offset;
-    switch (whence) {
-      case kSeekSet:
-        new_offset = offset;
-        break;
-      case kSeekCurrent:
-        new_offset = output_offset_ + offset;
-        break;
-      default:
-        LOG(FATAL) << "Unsupported seek type: " << whence;
-        UNREACHABLE();
-    }
-    if (output_good_) {
-      off_t actual_offset = output_->Seek(offset, whence);
-      if (actual_offset == (off_t)-1) {
-        PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset
-                    << " whence=" << whence << " new_offset=" << new_offset;
-        output_good_ = false;
-      }
-      DCHECK_EQ(actual_offset, new_offset);
-    }
-    output_offset_ = new_offset;
-    return new_offset;
-  }
-
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
@@ -660,9 +631,7 @@
 
   InstructionSet isa_;
 
-  OutputStream* output_;
-  bool output_good_;  // True if all writes to output succeeded.
-  off_t output_offset_;  // Keep track of the current position in the stream.
+  ErrorDelayingOutputStream stream_;
 
   Section rodata_;
   Section text_;
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03f8ceb..c5a0fd5 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -25,13 +25,16 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "os.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CompilerDriver;
-class DexFile;
 class ElfFile;
-class OatWriter;
+class OutputStream;
+
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
 
 class ElfWriter {
  public:
@@ -46,21 +49,26 @@
 
   static bool Fixup(File* file, uintptr_t oat_data_begin);
 
- protected:
-  ElfWriter(const CompilerDriver& driver, File* elf_file)
-    : compiler_driver_(&driver), elf_file_(elf_file) {
-  }
-
   virtual ~ElfWriter() {}
 
-  virtual bool Write(OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void Start() = 0;
+  virtual OutputStream* StartRoData() = 0;
+  virtual void EndRoData(OutputStream* rodata) = 0;
+  virtual OutputStream* StartText() = 0;
+  virtual void EndText(OutputStream* text) = 0;
+  virtual void SetBssSize(size_t bss_size) = 0;
+  virtual void WriteDynamicSection() = 0;
+  virtual void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) = 0;
+  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
+  virtual bool End() = 0;
 
-  const CompilerDriver* const compiler_driver_;
-  File* const elf_file_;
+  // Get the ELF writer's stream. This stream can be used for writing data directly
+  // to a section after the section has been finished. When that's done, the user
+  // should Seek() back to the position where the stream was before this operation.
+  virtual OutputStream* GetStream() = 0;
+
+ protected:
+  ElfWriter() = default;
 };
 
 }  // namespace art
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 81f574f..9dc6565 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -17,6 +17,7 @@
 #include "elf_writer_debug.h"
 
 #include <unordered_set>
+#include <vector>
 
 #include "base/casts.h"
 #include "base/stl_util.h"
@@ -25,6 +26,7 @@
 #include "dex_file-inl.h"
 #include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
+#include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
 #include "oat_writer.h"
@@ -203,7 +205,7 @@
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format) {
   CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
         format == dwarf::DW_EH_FRAME_FORMAT);
@@ -233,7 +235,7 @@
     cfi_section->WriteFully(buffer.data(), buffer.size());
     buffer_address += buffer.size();
     buffer.clear();
-    for (const OatWriter::DebugInfo& mi : method_infos) {
+    for (const MethodDebugInfo& mi : method_infos) {
       if (!mi.deduped_) {  // Only one FDE per unique address.
         ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
         if (!opcodes.empty()) {
@@ -286,12 +288,13 @@
     header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
     header_section->End();
   } else {
-    builder->WritePatches(".debug_frame.oat_patches", &patch_locations);
+    builder->WritePatches(".debug_frame.oat_patches",
+                          ArrayRef<const uintptr_t>(patch_locations));
   }
 }
 
 struct CompilationUnit {
-  std::vector<const OatWriter::DebugInfo*> methods_;
+  std::vector<const MethodDebugInfo*> methods_;
   size_t debug_line_offset_ = 0;
   uint32_t low_pc_ = 0xFFFFFFFFU;
   uint32_t high_pc_ = 0;
@@ -417,7 +420,7 @@
     // Write table into .debug_loc which describes location of dex register.
     // The dex register might be valid only at some points and it might
     // move between machine registers and stack.
-    void WriteRegLocation(const OatWriter::DebugInfo* method_info, uint16_t vreg,
+    void WriteRegLocation(const MethodDebugInfo* method_info, uint16_t vreg,
                           bool is64bitValue, uint32_t compilation_unit_low_pc) {
       using Kind = DexRegisterLocation::Kind;
       bool is_optimizing = method_info->compiled_method_->GetQuickCode().size() > 0 &&
@@ -740,7 +743,8 @@
 
   void End() {
     builder_->GetDebugInfo()->End();
-    builder_->WritePatches(".debug_info.oat_patches", &debug_info_patches_);
+    builder_->WritePatches(".debug_info.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_info_patches_));
     builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
     builder_->WriteSection(".debug_str", &debug_str_.Data());
     builder_->WriteSection(".debug_loc", &debug_loc_);
@@ -807,7 +811,7 @@
     if (dwarf_isa != -1) {
       opcodes.SetISA(dwarf_isa);
     }
-    for (const OatWriter::DebugInfo* mi : compilation_unit.methods_) {
+    for (const MethodDebugInfo* mi : compilation_unit.methods_) {
       // Ignore function if we have already generated line table for the same address.
       // It would confuse the debugger and the DWARF specification forbids it.
       if (mi->deduped_) {
@@ -924,7 +928,8 @@
 
   void End() {
     builder_->GetDebugLine()->End();
-    builder_->WritePatches(".debug_line.oat_patches", &debug_line_patches);
+    builder_->WritePatches(".debug_line.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_line_patches));
   }
 
  private:
@@ -934,11 +939,11 @@
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos) {
+                        const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
+  for (const MethodDebugInfo& mi : method_infos) {
     auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
     const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
     if (compilation_units.empty() || source_file != last_source_file) {
@@ -975,18 +980,18 @@
 // Explicit instantiations
 template void WriteCFISection<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat format);
 template void WriteCFISection<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat format);
 template void WriteDebugSections<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 template void WriteDebugSections<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e58fd0a..9ed102f 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,23 +17,22 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include <vector>
-
 #include "elf_builder.h"
 #include "dwarf/dwarf_constants.h"
 #include "oat_writer.h"
+#include "utils/array_ref.h"
 
 namespace art {
 namespace dwarf {
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format);
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos);
+                        const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 5c059e1..e411496 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,19 +21,18 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/unix_file/fd_file.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf.h"
 #include "elf_builder.h"
-#include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
-#include "oat.h"
-#include "oat_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "utils.h"
 
 namespace art {
@@ -57,125 +56,193 @@
 constexpr bool kGenerateSingleArmMappingSymbol = true;
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Create(File* elf_file,
-                                      OatWriter* oat_writer,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
+class ElfWriterQuick FINAL : public ElfWriter {
+ public:
+  ElfWriterQuick(InstructionSet instruction_set,
+                 const CompilerOptions* compiler_options,
+                 File* elf_file);
+  ~ElfWriterQuick();
+
+  void Start() OVERRIDE;
+  OutputStream* StartRoData() OVERRIDE;
+  void EndRoData(OutputStream* rodata) OVERRIDE;
+  OutputStream* StartText() OVERRIDE;
+  void EndText(OutputStream* text) OVERRIDE;
+  void SetBssSize(size_t bss_size) OVERRIDE;
+  void WriteDynamicSection() OVERRIDE;
+  void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) OVERRIDE;
+  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
+  bool End() OVERRIDE;
+
+  virtual OutputStream* GetStream() OVERRIDE;
+
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer);
+
+ private:
+  const CompilerOptions* const compiler_options_;
+  File* const elf_file_;
+  std::unique_ptr<BufferedOutputStream> output_stream_;
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
+};
+
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file) {
+  if (Is64BitInstructionSet(instruction_set)) {
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, compiler_options, elf_file);
+  } else {
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, compiler_options, elf_file);
+  }
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos);
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Write(
-    OatWriter* oat_writer,
-    const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
-    const std::string& android_root_unused ATTRIBUTE_UNUSED,
-    bool is_host_unused ATTRIBUTE_UNUSED) {
-  const InstructionSet isa = compiler_driver_->GetInstructionSet();
-  std::unique_ptr<BufferedOutputStream> output_stream(
-      new BufferedOutputStream(new FileOutputStream(elf_file_)));
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(
-      new ElfBuilder<ElfTypes>(isa, output_stream.get()));
+ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const CompilerOptions* compiler_options,
+                                         File* elf_file)
+    : ElfWriter(),
+      compiler_options_(compiler_options),
+      elf_file_(elf_file),
+      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {}
 
-  builder->Start();
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
 
-  auto* rodata = builder->GetRoData();
-  auto* text = builder->GetText();
-  auto* bss = builder->GetBss();
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::Start() {
+  builder_->Start();
+}
 
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() {
+  auto* rodata = builder_->GetRoData();
   rodata->Start();
-  if (!oat_writer->WriteRodata(rodata)) {
-    return false;
-  }
-  rodata->End();
+  return rodata;
+}
 
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndRoData(OutputStream* rodata) {
+  CHECK_EQ(builder_->GetRoData(), rodata);
+  builder_->GetRoData()->End();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartText() {
+  auto* text = builder_->GetText();
   text->Start();
-  if (!oat_writer->WriteCode(text)) {
-    return false;
-  }
-  text->End();
+  return text;
+}
 
-  if (oat_writer->GetBssSize() != 0) {
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) {
+  CHECK_EQ(builder_->GetText(), text);
+  builder_->GetText()->End();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) {
+  auto* bss = builder_->GetBss();
+  if (bss_size != 0u) {
     bss->Start();
-    bss->SetSize(oat_writer->GetBssSize());
+    bss->SetSize(bss_size);
     bss->End();
   }
-
-  builder->WriteDynamicSection(elf_file_->GetPath());
-
-  if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-    const auto& method_infos = oat_writer->GetMethodDebugInfo();
-    if (!method_infos.empty()) {
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate CFI (stack unwinding information).
-      dwarf::WriteCFISection(builder.get(), method_infos, kCFIFormat);
-      // Write DWARF .debug_* sections.
-      dwarf::WriteDebugSections(builder.get(), method_infos);
-    }
-  }
-
-  // Add relocation section for .text.
-  if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
-    // Note that ElfWriter::Fixup will be called regardless and therefore
-    // we need to include oat_patches for debug sections unconditionally.
-    builder->WritePatches(".text.oat_patches", &oat_writer->GetAbsolutePatchLocations());
-  }
-
-  builder->End();
-
-  return builder->Good() && output_stream->Flush();
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
+void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
+  builder_->WriteDynamicSection(elf_file_->GetPath());
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
+    const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    if (!method_infos.empty()) {
+      // Add methods to .symtab.
+      WriteDebugSymbols(builder_.get(), method_infos);
+      // Generate CFI (stack unwinding information).
+      dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat);
+      // Write DWARF .debug_* sections.
+      dwarf::WriteDebugSections(builder_.get(), method_infos);
+    }
+  }
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WritePatchLocations(
+    const ArrayRef<const uintptr_t>& patch_locations) {
+  // Add relocation section for .text.
+  if (compiler_options_->GetIncludePatchInformation()) {
+    // Note that ElfWriter::Fixup will be called regardless and therefore
+    // we need to include oat_patches for debug sections unconditionally.
+    builder_->WritePatches(".text.oat_patches", patch_locations);
+  }
+}
+
+template <typename ElfTypes>
+bool ElfWriterQuick<ElfTypes>::End() {
+  builder_->End();
+
+  return builder_->Good();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::GetStream() {
+  return builder_->GetStream();
+}
+
+template <typename ElfTypes>
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   bool generated_mapping_symbol = false;
   auto* strtab = builder->GetStrTab();
   auto* symtab = builder->GetSymTab();
 
-  if (method_info.empty()) {
+  if (method_infos.empty()) {
     return;
   }
 
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
   std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
     }
   }
 
   strtab->Start();
   strtab->Write("");  // strtab should start with empty string.
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
-    if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
       name += " [DEDUPED]";
     }
 
-    uint32_t low_pc = it->low_pc_;
+    uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += it->compiled_method_->CodeDelta();
+    low_pc += info.compiled_method_->CodeDelta();
     symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     // Note that even if we generate just a single mapping symbol, ARM's Streamline
     // requires it to match function symbol.  Just address 0 does not work.
-    if (it->compiled_method_->GetInstructionSet() == kThumb2) {
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), it->low_pc_ & ~1,
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
                     true, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 83781ab..347d372 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,46 +17,19 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
-#include "elf_utils.h"
+#include <memory>
+
+#include "arch/instruction_set.h"
 #include "elf_writer.h"
-#include "oat_writer.h"
+#include "os.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfWriterQuick FINAL : public ElfWriter {
- public:
-  // Write an ELF file. Returns true on success, false on failure.
-  static bool Create(File* file,
-                     OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host,
-                     const CompilerDriver& driver)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+class CompilerOptions;
 
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                               std::vector<uint8_t>* buffer);
-
- protected:
-  bool Write(OatWriter* oat_writer,
-             const std::vector<const DexFile*>& dex_files,
-             const std::string& android_root,
-             bool is_host)
-      OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  ElfWriterQuick(const CompilerDriver& driver, File* elf_file)
-    : ElfWriter(driver, elf_file) {}
-  ~ElfWriterQuick() {}
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
-};
-
-// Explicitly instantiated in elf_writer_quick.cc
-typedef ElfWriterQuick<ElfTypes32> ElfWriterQuick32;
-typedef ElfWriterQuick<ElfTypes64> ElfWriterQuick64;
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file);
 
 }  // namespace art
 
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index b413a9e..7cf774e 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -101,7 +101,8 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfBuilder<ElfTypes32>::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(ArrayRef<const uintptr_t>(patch_locations),
+                                             &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6df1527..cda6240 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,7 +23,9 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
 #include "lock_word.h"
@@ -32,7 +34,6 @@
 #include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -92,12 +93,37 @@
                            /*compiling_boot_image*/true,
                            &timings,
                            &key_value_store);
-      bool success = writer->PrepareImageAddressSpace() &&
-          compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                     !kIsTargetBuild,
-                                     class_linker->GetBootClassPath(),
-                                     &oat_writer,
-                                     oat_file.GetFile());
+      std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+          compiler_driver_->GetInstructionSet(),
+          &compiler_driver_->GetCompilerOptions(),
+          oat_file.GetFile());
+      bool success = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(success);
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      bool rodata_ok = oat_writer.WriteRodata(rodata);
+      ASSERT_TRUE(rodata_ok);
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      bool text_ok = oat_writer.WriteCode(text);
+      ASSERT_TRUE(text_ok);
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      success = elf_writer->End();
+
       ASSERT_TRUE(success);
     }
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 341742e..bf1fcdd 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -540,7 +540,10 @@
 }
 
 bool ImageWriter::AllocMemory() {
-  const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_,
+  const size_t length = RoundUp(image_objects_offset_begin_ +
+                                    GetBinSizeSum() +
+                                    intern_table_bytes_ +
+                                    class_table_bytes_,
                                 kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image",
@@ -1030,6 +1033,14 @@
           WalkFieldsInOrder(value);
         }
       }
+    } else if (h_obj->IsClassLoader()) {
+      // Register the class loader if it has a class table.
+      // The fake boot class loader should not get registered and we should end up with only one
+      // class loader.
+      mirror::ClassLoader* class_loader = h_obj->AsClassLoader();
+      if (class_loader->GetClassTable() != nullptr) {
+        class_loaders_.insert(class_loader);
+      }
     }
   }
 }
@@ -1154,10 +1165,26 @@
   }
 
   // Calculate how big the intern table will be after being serialized.
-  auto* const intern_table = Runtime::Current()->GetInternTable();
+  InternTable* const intern_table = runtime->GetInternTable();
   CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
   intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
 
+  // Write out the class table.
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  if (boot_image_space_ == nullptr) {
+    // Compiling the boot image, add null class loader.
+    class_loaders_.insert(nullptr);
+  }
+  if (!class_loaders_.empty()) {
+    CHECK_EQ(class_loaders_.size(), 1u) << "Should only have one real class loader in the image";
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    for (mirror::ClassLoader* loader : class_loaders_) {
+      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+      CHECK(table != nullptr);
+      class_table_bytes_ += table->WriteToMemory(nullptr);
+    }
+  }
+
   // Note that image_end_ is left at end of used mirror object section.
 }
 
@@ -1199,6 +1226,12 @@
   auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
+  // Calculate the size of the class table section.
+  auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
+  *class_table_section = ImageSection(cur_pos, class_table_bytes_);
+  cur_pos = class_table_section->End();
+  // Image end goes right before the start of the image bitmap.
+  const size_t image_end = static_cast<uint32_t>(cur_pos);
   // Finally bitmap section.
   const size_t bitmap_bytes = image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
@@ -1212,7 +1245,6 @@
     }
     LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_;
   }
-  const size_t image_end = static_cast<uint32_t>(interned_strings_section->End());
   CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
       "Oat file should be right after the image.";
   // Create the header.
@@ -1323,23 +1355,48 @@
     }
     image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
+  FixupRootVisitor root_visitor(this);
+
   // Write the intern table into the image.
   const ImageSection& intern_table_section = image_header->GetImageSection(
       ImageHeader::kSectionInternedStrings);
-  InternTable* const intern_table = Runtime::Current()->GetInternTable();
-  uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset();
-  const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr);
+  Runtime* const runtime = Runtime::Current();
+  InternTable* const intern_table = runtime->GetInternTable();
+  uint8_t* const intern_table_memory_ptr = image_->Begin() + intern_table_section.Offset();
+  const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
   // Fixup the pointers in the newly written intern table to contain image addresses.
-  InternTable temp_table;
+  InternTable temp_intern_table;
   // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
   // the VisitRoots() will update the memory directly rather than the copies.
   // This also relies on visit roots not doing any verification which could fail after we update
   // the roots to be the image addresses.
-  temp_table.ReadFromMemory(memory_ptr);
-  CHECK_EQ(temp_table.Size(), intern_table->Size());
-  FixupRootVisitor visitor(this);
-  temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
-  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
+  temp_intern_table.ReadFromMemory(intern_table_memory_ptr);
+  CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+  temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+
+  // Write the class table(s) into the image.
+  ClassLinker* const class_linker = runtime->GetClassLinker();
+  const ImageSection& class_table_section = image_header->GetImageSection(
+      ImageHeader::kSectionClassTable);
+  uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
+  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  size_t class_table_bytes = 0;
+  for (mirror::ClassLoader* loader : class_loaders_) {
+    ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+    CHECK(table != nullptr);
+    uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
+    class_table_bytes += table->WriteToMemory(memory_ptr);
+    // Fixup the pointers in the newly written class table to contain image addresses. See
+    // above comment for intern tables.
+    ClassTable temp_class_table;
+    temp_class_table.ReadFromMemory(memory_ptr);
+    // CHECK_EQ(temp_class_table.NumNonZygoteClasses(), table->NumNonZygoteClasses());
+    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                    RootInfo(kRootUnknown));
+    temp_class_table.VisitRoots(buffered_visitor);
+  }
+  CHECK_EQ(class_table_bytes, class_table_bytes_);
 }
 
 void ImageWriter::CopyAndFixupObjects() {
@@ -1553,8 +1610,7 @@
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) {
         FixupDexCache(down_cast<mirror::DexCache*>(orig), down_cast<mirror::DexCache*>(copy));
-      } else if (klass->IsSubClass(down_cast<mirror::Class*>(
-          class_linker->GetClassRoot(ClassLinker::kJavaLangClassLoader)))) {
+      } else if (klass->IsClassLoaderClass()) {
         // If src is a ClassLoader, set the class table to null so that it gets recreated by the
         // ClassLoader.
         down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
@@ -1840,7 +1896,8 @@
                                 bin_slot_sizes_[kBinArtMethodDirty] +
                                 bin_slot_sizes_[kBinArtMethodClean] +
                                 bin_slot_sizes_[kBinDexCacheArray] +
-                                intern_table_bytes_;
+                                intern_table_bytes_ +
+                                class_table_bytes_;
   return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize);
 }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 889cd10..386838f 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -72,7 +72,8 @@
         intern_table_bytes_(0u),
         image_method_array_(ImageHeader::kImageMethodsCount),
         dirty_methods_(0u),
-        clean_methods_(0u) {
+        clean_methods_(0u),
+        class_table_bytes_(0u) {
     CHECK_NE(image_begin, 0U);
     std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
     std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
@@ -453,6 +454,12 @@
   // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
   std::unordered_map<mirror::Class*, bool> prune_class_memo_;
 
+  // Class loaders with a class table to write out. Should only be one currently.
+  std::unordered_set<mirror::ClassLoader*> class_loaders_;
+
+  // Number of image class table bytes.
+  size_t class_table_bytes_;
+
   friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 2125c9a..d001495 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -170,18 +170,6 @@
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Check if the method is already compiled.
-  if (runtime->GetJit()->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;
-  }
-
-  // Don't compile the method if we are supposed to be deoptimized.
-  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
-  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
-    return false;
-  }
-
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
@@ -190,13 +178,13 @@
   }
 
   // Do the compilation.
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
   bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+    JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 13754fd..73b0fac 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -17,9 +17,9 @@
 #include "linker/arm/relative_patcher_arm_base.h"
 
 #include "compiled_method.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 57018af..3d4c218 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -20,10 +20,10 @@
 #include "art_method.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
-#include "utils/arm64/assembler_arm64.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
+#include "utils/arm64/assembler_arm64.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/buffered_output_stream.cc b/compiler/linker/buffered_output_stream.cc
similarity index 76%
rename from compiler/buffered_output_stream.cc
rename to compiler/linker/buffered_output_stream.cc
index 3ca518b..4c66c76 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/linker/buffered_output_stream.cc
@@ -20,18 +20,24 @@
 
 namespace art {
 
-BufferedOutputStream::BufferedOutputStream(OutputStream* out)
-    : OutputStream(out->GetLocation()), out_(out), used_(0) {}
+BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out)
+    : OutputStream(out->GetLocation()),  // Before out is moved to out_.
+      out_(std::move(out)),
+      used_(0) {}
+
+BufferedOutputStream::~BufferedOutputStream() {
+  FlushBuffer();
+}
 
 bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
     return out_->WriteFully(buffer, byte_count);
   }
   if (used_ + byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
   }
@@ -42,6 +48,10 @@
 }
 
 bool BufferedOutputStream::Flush() {
+  return FlushBuffer() && out_->Flush();
+}
+
+bool BufferedOutputStream::FlushBuffer() {
   bool success = true;
   if (used_ > 0) {
     success = out_->WriteFully(&buffer_[0], used_);
@@ -51,7 +61,7 @@
 }
 
 off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
-  if (!Flush()) {
+  if (!FlushBuffer()) {
     return -1;
   }
   return out_->Seek(offset, whence);
diff --git a/compiler/buffered_output_stream.h b/compiler/linker/buffered_output_stream.h
similarity index 65%
rename from compiler/buffered_output_stream.h
rename to compiler/linker/buffered_output_stream.h
index b447f41..a2eefbb 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/linker/buffered_output_stream.h
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
-#define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+
+#include <memory>
 
 #include "output_stream.h"
 
@@ -25,26 +27,23 @@
 
 class BufferedOutputStream FINAL : public OutputStream {
  public:
-  explicit BufferedOutputStream(OutputStream* out);
+  explicit BufferedOutputStream(std::unique_ptr<OutputStream> out);
 
-  virtual ~BufferedOutputStream() {
-    Flush();
-    delete out_;
-  }
+  ~BufferedOutputStream() OVERRIDE;
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
 
-  bool Flush();
+  bool Flush() OVERRIDE;
 
  private:
   static const size_t kBufferSize = 8 * KB;
 
-  OutputStream* const out_;
+  bool FlushBuffer();
 
+  std::unique_ptr<OutputStream> const out_;
   uint8_t buffer_[kBufferSize];
-
   size_t used_;
 
   DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
@@ -52,4 +51,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h
new file mode 100644
index 0000000..99410e4
--- /dev/null
+++ b/compiler/linker/error_delaying_output_stream.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+// OutputStream wrapper that delays reporting an error until Flush().
+class ErrorDelayingOutputStream FINAL : public OutputStream {
+ public:
+  explicit ErrorDelayingOutputStream(OutputStream* output)
+      : OutputStream(output->GetLocation()),
+        output_(output),
+        output_good_(true),
+        output_offset_(0) { }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to " << GetLocation() << " at offset " << output_offset_;
+        output_good_ = false;
+      }
+    }
+    output_offset_ += byte_count;
+    return true;
+  }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == static_cast<off_t>(-1)) {
+        PLOG(ERROR) << "Failed to seek in " << GetLocation() << ". Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
+  }
+
+  // Flush the output and return whether all operations have succeeded.
+  // Do nothing if we already have a pending error.
+  bool Flush() OVERRIDE {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
+  }
+
+  // Check (without flushing) whether all operations have succeeded so far.
+  bool Good() const {
+    return output_good_;
+  }
+
+ private:
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
diff --git a/compiler/file_output_stream.cc b/compiler/linker/file_output_stream.cc
similarity index 94%
rename from compiler/file_output_stream.cc
rename to compiler/linker/file_output_stream.cc
index 3ee16f5..bbfbdfd 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/linker/file_output_stream.cc
@@ -33,4 +33,8 @@
   return lseek(file_->Fd(), offset, static_cast<int>(whence));
 }
 
+bool FileOutputStream::Flush() {
+  return file_->Flush() == 0;
+}
+
 }  // namespace art
diff --git a/compiler/file_output_stream.h b/compiler/linker/file_output_stream.h
similarity index 72%
rename from compiler/file_output_stream.h
rename to compiler/linker/file_output_stream.h
index 9dfbd7f..f2d8453 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/linker/file_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_FILE_OUTPUT_STREAM_H_
-#define ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -27,11 +27,13 @@
  public:
   explicit FileOutputStream(File* file);
 
-  virtual ~FileOutputStream() {}
+  ~FileOutputStream() OVERRIDE {}
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
 
  private:
   File* const file_;
@@ -41,4 +43,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream.cc b/compiler/linker/output_stream.cc
similarity index 100%
rename from compiler/output_stream.cc
rename to compiler/linker/output_stream.cc
diff --git a/compiler/output_stream.h b/compiler/linker/output_stream.h
similarity index 76%
rename from compiler/output_stream.h
rename to compiler/linker/output_stream.h
index 4d30b83..96a5f48 100644
--- a/compiler/output_stream.h
+++ b/compiler/linker/output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OUTPUT_STREAM_H_
-#define ART_COMPILER_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_OUTPUT_STREAM_H_
 
 #include <ostream>
 #include <string>
@@ -45,6 +45,14 @@
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
+  /*
+   * Flushes the stream. Returns whether the operation was successful.
+   *
+   * An OutputStream may delay reporting errors from WriteFully() or
+   * Seek(). In that case, Flush() shall report any pending error.
+   */
+  virtual bool Flush() = 0;
+
  private:
   const std::string location_;
 
@@ -53,4 +61,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream_test.cc b/compiler/linker/output_stream_test.cc
similarity index 71%
rename from compiler/output_stream_test.cc
rename to compiler/linker/output_stream_test.cc
index 6104ccd..84c76f2 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -19,6 +19,7 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -48,6 +49,7 @@
     EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
     CheckOffset(10);
     EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
+    EXPECT_TRUE(output_stream_->Flush());
   }
 
   void CheckTestOutput(const std::vector<uint8_t>& actual) {
@@ -77,9 +79,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-    CHECK(file_output_stream.get() != nullptr);
-    BufferedOutputStream buffered_output_stream(file_output_stream.release());
+    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -99,4 +99,39 @@
   CheckTestOutput(output);
 }
 
+TEST_F(OutputStreamTest, BufferedFlush) {
+  struct CheckingOutputStream : OutputStream {
+    CheckingOutputStream()
+        : OutputStream("dummy"),
+          flush_called(false) { }
+    ~CheckingOutputStream() OVERRIDE {}
+
+    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
+                    size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    bool Flush() OVERRIDE {
+      flush_called = true;
+      return true;
+    }
+
+    bool flush_called;
+  };
+
+  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  CheckingOutputStream* checking_output_stream = cos.get();
+  BufferedOutputStream buffered(std::move(cos));
+  ASSERT_FALSE(checking_output_stream->flush_called);
+  bool flush_result = buffered.Flush();
+  ASSERT_TRUE(flush_result);
+  ASSERT_TRUE(checking_output_stream->flush_called);
+}
+
 }  // namespace art
diff --git a/compiler/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc
similarity index 94%
rename from compiler/vector_output_stream.cc
rename to compiler/linker/vector_output_stream.cc
index 3d33673..f758005 100644
--- a/compiler/vector_output_stream.cc
+++ b/compiler/linker/vector_output_stream.cc
@@ -21,7 +21,7 @@
 namespace art {
 
 VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector)
-  : OutputStream(location), offset_(vector->size()), vector_(vector) {}
+    : OutputStream(location), offset_(vector->size()), vector_(vector) {}
 
 off_t VectorOutputStream::Seek(off_t offset, Whence whence) {
   CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence;
diff --git a/compiler/vector_output_stream.h b/compiler/linker/vector_output_stream.h
similarity index 81%
rename from compiler/vector_output_stream.h
rename to compiler/linker/vector_output_stream.h
index 3c5877c..3210143 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/linker/vector_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
-#define ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -29,9 +29,9 @@
  public:
   VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
 
-  virtual ~VectorOutputStream() {}
+  ~VectorOutputStream() OVERRIDE {}
 
-  bool WriteFully(const void* buffer, size_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
     if (static_cast<size_t>(offset_) == vector_->size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_->insert(vector_->end(), &start[0], &start[byte_count]);
@@ -45,7 +45,11 @@
     return true;
   }
 
-  off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE {
+    return true;
+  }
 
  private:
   void EnsureCapacity(off_t new_offset) {
@@ -62,4 +66,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 030451c..b8610d0 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -27,14 +27,17 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "linker/vector_output_stream.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -134,11 +137,36 @@
                          /*compiling_boot_image*/false,
                          &timings,
                          &key_value_store);
-    return compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                      !kIsTargetBuild,
-                                      dex_files,
-                                      &oat_writer,
-                                      file);
+    std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+        compiler_driver_->GetInstructionSet(),
+        &compiler_driver_->GetCompilerOptions(),
+        file);
+
+    elf_writer->Start();
+
+    OutputStream* rodata = elf_writer->StartRoData();
+    if (!oat_writer.WriteRodata(rodata)) {
+      return false;
+    }
+    elf_writer->EndRoData(rodata);
+
+    OutputStream* text = elf_writer->StartText();
+    if (!oat_writer.WriteCode(text)) {
+      return false;
+    }
+    elf_writer->EndText(text);
+
+    elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+    elf_writer->WriteDynamicSection();
+
+    ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+    elf_writer->WriteDebugInfo(method_infos);
+
+    ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+    elf_writer->WritePatchLocations(patch_locations);
+
+    return elf_writer->End();
   }
 
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 40a3f14..0087a0d 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -31,10 +31,12 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "image_writer.h"
+#include "linker/output_stream.h"
 #include "linker/relative_patcher.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -42,7 +44,6 @@
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "type_lookup_table.h"
@@ -485,7 +486,7 @@
         // Record debug information for this function if we are doing that.
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(DebugInfo {
+        writer_->method_info_.push_back(dwarf::MethodDebugInfo {
             dex_file_,
             class_def_index_,
             it.GetMemberIndex(),
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 7027434..6c46ebc 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -38,6 +38,10 @@
 class TimingLogger;
 class TypeLookupTable;
 
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
+
 // OatHeader         variable length with count of D OatDexFiles
 //
 // OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
@@ -129,19 +133,7 @@
 
   ~OatWriter();
 
-  struct DebugInfo {
-    const DexFile* dex_file_;
-    size_t class_def_index_;
-    uint32_t dex_method_index_;
-    uint32_t access_flags_;
-    const DexFile::CodeItem *code_item_;
-    bool deduped_;
-    uint32_t low_pc_;
-    uint32_t high_pc_;
-    CompiledMethod* compiled_method_;
-  };
-
-  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
+  const std::vector<dwarf::MethodDebugInfo>& GetMethodDebugInfo() const {
     return method_info_;
   }
 
@@ -280,7 +272,7 @@
     DISALLOW_COPY_AND_ASSIGN(OatClass);
   };
 
-  std::vector<DebugInfo> method_info_;
+  std::vector<dwarf::MethodDebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index cca0baf..7dbfd7c 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/arena_containers.h"
 #include "induction_var_range.h"
+#include "side_effects_analysis.h"
 #include "nodes.h"
 
 namespace art {
@@ -175,6 +176,24 @@
     return false;
   }
 
+  // Returns if it's certain this->bound > `bound`.
+  bool GreaterThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ > bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound < `bound`.
+  bool LessThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ < bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -252,157 +271,6 @@
   int32_t constant_;
 };
 
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
-  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
-      : induction_variable_(induction_variable),
-        found_array_length_(nullptr),
-        offset_low_(std::numeric_limits<int32_t>::max()),
-        offset_high_(std::numeric_limits<int32_t>::min()) {
-    Run();
-  }
-
-  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
-  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
-  int32_t GetOffsetLow() const { return offset_low_; }
-  int32_t GetOffsetHigh() const { return offset_high_; }
-
-  // Returns if `block` that is in loop_info may exit the loop, unless it's
-  // the loop header for loop_info.
-  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
-    DCHECK(loop_info->Contains(*block));
-    if (block == loop_info->GetHeader()) {
-      // Loop header of loop_info. Exiting loop is normal.
-      return false;
-    }
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      if (!loop_info->Contains(*successor)) {
-        // One of the successors exits the loop.
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
-    for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
-      if (!block->Dominates(back_edge)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void Run() {
-    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
-    HBasicBlock* block = it_loop.Current();
-    DCHECK(block == induction_variable_->GetBlock());
-    // Skip loop header. Since narrowed value range of a MonotonicValueRange only
-    // applies to the loop body (after the test at the end of the loop header).
-    it_loop.Advance();
-    for (; !it_loop.Done(); it_loop.Advance()) {
-      block = it_loop.Current();
-      DCHECK(block->IsInLoop());
-      if (!DominatesAllBackEdges(block, loop_info)) {
-        // In order not to trigger deoptimization unnecessarily, make sure
-        // that all array accesses collected are really executed in the loop.
-        // For array accesses in a branch inside the loop, don't collect the
-        // access. The bounds check in that branch might not be eliminated.
-        continue;
-      }
-      if (EarlyExit(block, loop_info)) {
-        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
-        // that the loop will loop through the full monotonic value range from
-        // initial_ to end_. So adding deoptimization might be too aggressive and can
-        // trigger deoptimization unnecessarily even if the loop won't actually throw
-        // AIOOBE.
-        found_array_length_ = nullptr;
-        return;
-      }
-      for (HInstruction* instruction = block->GetFirstInstruction();
-           instruction != nullptr;
-           instruction = instruction->GetNext()) {
-        if (!instruction->IsBoundsCheck()) {
-          continue;
-        }
-
-        HInstruction* length_value = instruction->InputAt(1);
-        if (length_value->IsIntConstant()) {
-          // TODO: may optimize for constant case.
-          continue;
-        }
-
-        if (length_value->IsPhi()) {
-          // When adding deoptimizations in outer loops, we might create
-          // a phi for the array length, and update all uses of the
-          // length in the loop to that phi. Therefore, inner loops having
-          // bounds checks on the same array will use that phi.
-          // TODO: handle these cases.
-          continue;
-        }
-
-        DCHECK(length_value->IsArrayLength());
-        HArrayLength* array_length = length_value->AsArrayLength();
-
-        HInstruction* array = array_length->InputAt(0);
-        if (array->IsNullCheck()) {
-          array = array->AsNullCheck()->InputAt(0);
-        }
-        if (loop_info->Contains(*array->GetBlock())) {
-          // Array is defined inside the loop. Skip.
-          continue;
-        }
-
-        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
-          // There is already access for another array recorded for the loop.
-          // TODO: handle multiple arrays.
-          continue;
-        }
-
-        HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
-        HInstruction* left = index;
-        int32_t right = 0;
-        if (left == induction_variable_ ||
-            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
-             left == induction_variable_)) {
-          // For patterns like array[i] or array[i + 2].
-          if (right < offset_low_) {
-            offset_low_ = right;
-          }
-          if (right > offset_high_) {
-            offset_high_ = right;
-          }
-        } else {
-          // Access not in induction_variable/(induction_variable_ + constant)
-          // format. Skip.
-          continue;
-        }
-        // Record this array.
-        found_array_length_ = array_length;
-      }
-    }
-  }
-
- private:
-  // The instruction that corresponds to a MonotonicValueRange.
-  HInstruction* induction_variable_;
-
-  // The array length of the array that's accessed inside the loop body.
-  HArrayLength* found_array_length_;
-
-  // The lowest and highest constant offsets relative to induction variable
-  // instruction_ in all array accesses.
-  // If array access are: array[i-1], array[i], array[i+1],
-  // offset_low_ is -1 and offset_high is 1.
-  int32_t offset_low_;
-  int32_t offset_high_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +368,13 @@
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
         initial_(initial),
-        end_(nullptr),
-        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
-  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
   ValueBound GetBound() const { return bound_; }
-  void SetEnd(HInstruction* end) { end_ = end; }
-  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
   HBasicBlock* GetLoopHeader() const {
     DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
     return induction_variable_->GetBlock();
@@ -519,23 +382,6 @@
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
-  HBasicBlock* GetLoopHeaderSuccesorInLoop() {
-    HBasicBlock* header = GetLoopHeader();
-    HInstruction* instruction = header->GetLastInstruction();
-    DCHECK(instruction->IsIf());
-    HIf* h_if = instruction->AsIf();
-    HLoopInformation* loop_info = header->GetLoopInformation();
-    bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
-    bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
-    // Just in case it's some strange loop structure.
-    if (true_successor_in_loop && false_successor_in_loop) {
-      return nullptr;
-    }
-    DCHECK(true_successor_in_loop || false_successor_in_loop);
-    return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
-  }
-
   // If it's certain that this value range fits in other_range.
   bool FitsIn(ValueRange* other_range) const OVERRIDE {
     if (other_range == nullptr) {
@@ -627,467 +473,9 @@
     }
   }
 
-  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
-  // For example, this loop:
-  //
-  //   for (int i = start; i < end; i++) {
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // will be transformed to:
-  //
-  //   int array_length_in_loop_body_if_needed;
-  //   if (start >= end) {
-  //     array_length_in_loop_body_if_needed = 0;
-  //   } else {
-  //     if (start < 1) deoptimize();
-  //     if (array == null) deoptimize();
-  //     array_length = array.length;
-  //     if (end > array_length - 1) deoptimize;
-  //     array_length_in_loop_body_if_needed = array_length;
-  //   }
-  //   for (int i = start; i < end; i++) {
-  //     // No more null check and bounds check.
-  //     // array.length value is replaced with array_length_in_loop_body_if_needed
-  //     // in the loop body.
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // We basically first go through the loop body and find those array accesses whose
-  // index is at a constant offset from the induction variable ('i' in the above example),
-  // and update offset_low and offset_high along the way. We then add the following
-  // deoptimizations in the loop pre-header (suppose end is not inclusive).
-  //   if (start < -offset_low) deoptimize();
-  //   if (end >= array.length - offset_high) deoptimize();
-  // It might be necessary to first hoist array.length (and the null check on it) out of
-  // the loop with another deoptimization.
-  //
-  // In order not to trigger deoptimization unnecessarily, we want to make a strong
-  // guarantee that no deoptimization is triggered if the loop body itself doesn't
-  // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
-  // body must throw AIOOBE).
-  // This is achieved by the following:
-  // 1) We only process loops that iterate through the full monotonic range from
-  //    initial_ to end_. We do the following checks to make sure that's the case:
-  //    a) The loop doesn't have early exit (via break, return, etc.)
-  //    b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
-  // 2) We only collect array accesses of blocks in the loop body that dominate
-  //    all loop back edges, these array accesses are guaranteed to happen
-  //    at each loop iteration.
-  // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
-  // when the induction variable is at initial_ and end_ must be in a legal range.
-  // Since the added deoptimizations are basically checking the induction variable
-  // at initial_ and end_ values, no deoptimization will be triggered either.
-  //
-  // A special case is the loop body isn't entered at all. In that case, we may still
-  // add deoptimization due to the analysis described above. In order not to trigger
-  // deoptimization, we do a test between initial_ and end_ first and skip over
-  // the added deoptimization.
-  ValueRange* NarrowWithDeoptimization() {
-    if (increment_ != 1 && increment_ != -1) {
-      // In order not to trigger deoptimization unnecessarily, we want to
-      // make sure the loop iterates through the full range from initial_ to
-      // end_ so that boundaries are covered by the loop. An increment of 2,
-      // for example, may skip end_.
-      return this;
-    }
-
-    if (end_ == nullptr) {
-      // No full info to add deoptimization.
-      return this;
-    }
-
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't add a check in loop pre-header if the value isn't available there.
-      return this;
-    }
-
-    ArrayAccessInsideLoopFinder finder(induction_variable_);
-
-    if (!finder.HasFoundArrayLength()) {
-      // No array access was found inside the loop that can benefit
-      // from deoptimization.
-      return this;
-    }
-
-    if (!AddDeoptimization(finder)) {
-      return this;
-    }
-
-    // After added deoptimizations, induction variable fits in
-    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
-    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
-    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
-    // We've narrowed the range after added deoptimizations.
-    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
-  }
-
-  // Returns true if adding a (constant >= value) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    // See if we can prove the relationship first.
-    if (value->IsIntConstant()) {
-      if (value->AsIntConstant()->GetValue() >= constant) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Try to filter out cases that the loop entry test will never be true.
-  bool LoopEntryTestUseful() {
-    if (initial_->IsIntConstant() && end_->IsIntConstant()) {
-      int32_t initial_val = initial_->AsIntConstant()->GetValue();
-      int32_t end_val = end_->AsIntConstant()->GetValue();
-      if (increment_ == 1) {
-        if (inclusive_) {
-          return initial_val > end_val;
-        } else {
-          return initial_val >= end_val;
-        }
-      } else {
-        DCHECK_EQ(increment_, -1);
-        if (inclusive_) {
-          return initial_val < end_val;
-        } else {
-          return initial_val <= end_val;
-        }
-      }
-    }
-    return true;
-  }
-
-  // Returns the block for adding deoptimization.
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    // Deoptimization is only added when both initial_ and end_ are defined
-    // before the loop.
-    DCHECK(initial_->GetBlock()->Dominates(pre_header));
-    DCHECK(end_->GetBlock()->Dominates(pre_header));
-
-    // If it can be proven the loop body is definitely entered (unless exception
-    // is thrown in the loop header for which triggering deoptimization is fine),
-    // there is no need for tranforming the loop. In that case, deoptimization
-    // will just be added in the loop pre-header.
-    if (!LoopEntryTestUseful()) {
-      return pre_header;
-    }
-
-    HGraph* graph = header->GetGraph();
-    graph->TransformLoopHeaderForBCE(header);
-    HBasicBlock* new_pre_header = header->GetDominator();
-    DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
-    HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
-
-    dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
-    deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
-    new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
-    return deopt_block;
-  }
-
-  // Adds a test between initial_ and end_ to see if the loop body is entered.
-  // If the loop body isn't entered at all, it jumps to the loop pre-header (after
-  // transformation) to avoid any deoptimization.
-  void AddLoopBodyEntryTest() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    HBasicBlock* if_block = pre_header->GetDominator();
-    HGraph* graph = header->GetGraph();
-
-    HCondition* cond;
-    if (increment_ == 1) {
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
-      }
-    } else {
-      DCHECK_EQ(increment_, -1);
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HLessThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
-      }
-    }
-    HIf* h_if = new (graph->GetArena()) HIf(cond);
-    if_block->AddInstruction(cond);
-    if_block->AddInstruction(h_if);
-  }
-
-  // Adds a check that (value >= constant), and HDeoptimize otherwise.
-  void AddDeoptimizationConstant(HInstruction* value,
-                                 int32_t constant,
-                                 HBasicBlock* deopt_block,
-                                 bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-    if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
-    }
-
-    HIntConstant* const_instr = graph->GetIntConstant(constant);
-    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
-    HDeoptimize* deoptimize = new (graph->GetArena())
-        HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
-    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-        suspend_check->GetEnvironment(), header);
-  }
-
-  // Returns true if adding a (value <= array_length + offset) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationArrayLength(HInstruction* value,
-                                       HArrayLength* array_length,
-                                       int32_t offset,
-                                       bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    if (array_length->GetBlock() == header) {
-      // array_length_in_loop_body_if_needed only has correct value when the loop
-      // body is entered. We bail out in this case. Usually array_length defined
-      // in the loop header is already hoisted by licm.
-      return false;
-    } else {
-      // array_length is defined either before the loop header already, or in
-      // the loop body since it's used in the loop body. If it's defined in the loop body,
-      // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
-      // all the uses of array_length must be dominated by its definition in the loop
-      // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
-      // array_length once the loop body is entered so all the uses of the phi will
-      // use the correct value.
-    }
-
-    if (offset > 0) {
-      // There might be overflow issue.
-      // TODO: handle this, possibly with some distance relationship between
-      // offset_low and offset_high, or using another deoptimization to make
-      // sure (array_length + offset) doesn't overflow.
-      return false;
-    }
-
-    // See if we can prove the relationship first.
-    if (value == array_length) {
-      if (offset >= 0) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
-  void AddDeoptimizationArrayLength(HInstruction* value,
-                                    HArrayLength* array_length,
-                                    int32_t offset,
-                                    HBasicBlock* deopt_block,
-                                    bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
-    // We may need to hoist null-check and array_length out of loop first.
-    if (!array_length->GetBlock()->Dominates(deopt_block)) {
-      // array_length must be defined in the loop body.
-      DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
-      DCHECK(array_length->GetBlock() != header);
-
-      HInstruction* array = array_length->InputAt(0);
-      HNullCheck* null_check = array->AsNullCheck();
-      if (null_check != nullptr) {
-        array = null_check->InputAt(0);
-      }
-      // We've already made sure the array is defined before the loop when collecting
-      // array accesses for the loop.
-      DCHECK(array->GetBlock()->Dominates(deopt_block));
-      if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
-        // Hoist null check out of loop with a deoptimization.
-        HNullConstant* null_constant = graph->GetNullConstant();
-        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
-        // TODO: for one dex_pc, share the same deoptimization slow path.
-        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
-            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
-        deopt_block->InsertInstructionBefore(
-            null_check_cond, deopt_block->GetLastInstruction());
-        deopt_block->InsertInstructionBefore(
-            null_check_deoptimize, deopt_block->GetLastInstruction());
-        // Eliminate null check in the loop.
-        null_check->ReplaceWith(array);
-        null_check->GetBlock()->RemoveInstruction(null_check);
-        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-            suspend_check->GetEnvironment(), header);
-      }
-
-      HArrayLength* new_array_length
-          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
-      deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
-      if (loop_entry_test_block_added) {
-        // Replace array_length defined inside the loop body with a phi
-        // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
-        // no vreg number for it.
-        HPhi* phi = new (graph->GetArena()) HPhi(
-            graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
-        // Set to 0 if the loop body isn't entered.
-        phi->SetRawInputAt(0, graph->GetIntConstant(0));
-        // Set to array.length if the loop body is entered.
-        phi->SetRawInputAt(1, new_array_length);
-        pre_header->AddPhi(phi);
-        array_length->ReplaceWith(phi);
-        // Make sure phi is only used after the loop body is entered.
-        if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> it(phi->GetUses());
-               !it.Done();
-               it.Advance()) {
-            HInstruction* user = it.Current()->GetUser();
-            DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
-          }
-        }
-      } else {
-        array_length->ReplaceWith(new_array_length);
-      }
-
-      array_length->GetBlock()->RemoveInstruction(array_length);
-      // Use new_array_length for deopt.
-      array_length = new_array_length;
-    }
-
-    HInstruction* added = array_length;
-    if (offset != 0) {
-      HIntConstant* offset_instr = graph->GetIntConstant(offset);
-      added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
-      deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
-    }
-    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
-    HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
-    deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
-  }
-
-  // Adds deoptimizations in loop pre-header with the collected array access
-  // data so that value ranges can be established in loop body.
-  // Returns true if deoptimizations are successfully added, or if it's proven
-  // it's not necessary.
-  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
-    int32_t offset_low = finder.GetOffsetLow();
-    int32_t offset_high = finder.GetOffsetHigh();
-    HArrayLength* array_length = finder.GetFoundArrayLength();
-
-    HBasicBlock* pre_header =
-        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't move initial_ or end_ into pre_header for comparisons.
-      return false;
-    }
-
-    HBasicBlock* deopt_block;
-    bool loop_entry_test_block_added = false;
-    bool is_constant_proven, is_length_proven;
-
-    HInstruction* const_comparing_instruction;
-    int32_t const_compared_to;
-    HInstruction* array_length_comparing_instruction;
-    int32_t array_length_offset;
-    if (increment_ == 1) {
-      // Increasing from initial_ to end_.
-      const_comparing_instruction = initial_;
-      const_compared_to = -offset_low;
-      array_length_comparing_instruction = end_;
-      array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
-    } else {
-      const_comparing_instruction = end_;
-      const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
-      array_length_comparing_instruction = initial_;
-      array_length_offset = -offset_high - 1;
-    }
-
-    if (CanAddDeoptimizationConstant(const_comparing_instruction,
-                                     const_compared_to,
-                                     &is_constant_proven) &&
-        CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                        array_length,
-                                        array_length_offset,
-                                        &is_length_proven)) {
-      if (!is_constant_proven || !is_length_proven) {
-        deopt_block = TransformLoopForDeoptimizationIfNeeded();
-        loop_entry_test_block_added = (deopt_block != pre_header);
-        if (loop_entry_test_block_added) {
-          // Loop body may be entered.
-          AddLoopBodyEntryTest();
-        }
-      }
-      if (!is_constant_proven) {
-        AddDeoptimizationConstant(const_comparing_instruction,
-                                  const_compared_to,
-                                  deopt_block,
-                                  loop_entry_test_block_added);
-      }
-      if (!is_length_proven) {
-        AddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                     array_length,
-                                     array_length_offset,
-                                     deopt_block,
-                                     loop_entry_test_block_added);
-      }
-      return true;
-    }
-    return false;
-  }
-
  private:
   HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
   HInstruction* const initial_;     // Initial value.
-  HInstruction* end_;               // End value.
-  bool inclusive_;                  // Whether end value is inclusive.
   const int32_t increment_;         // Increment for each loop iteration.
   const ValueBound bound_;          // Additional value bound info for initial_.
 
@@ -1111,7 +499,9 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BCEVisitor(HGraph* graph,
+             const SideEffectsAnalysis& side_effects,
+             HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
         maps_(graph->GetBlocks().size(),
               ArenaSafeMap<int, ValueRange*>(
@@ -1121,8 +511,17 @@
         first_constant_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        early_exit_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        taken_test_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         need_to_revisit_block_(false),
+        has_deoptimization_on_constant_subscripts_(false),
         initial_block_size_(graph->GetBlocks().size()),
+        side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -1138,6 +537,17 @@
     }
   }
 
+  void Finish() {
+    // Preserve SSA structure which may have been broken by adding one or more
+    // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+    InsertPhiNodes();
+
+    // Clear the loop data structures.
+    early_exit_loop_.clear();
+    taken_test_loop_.clear();
+    finite_loop_.clear();
+  }
+
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
   ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1166,25 +576,6 @@
     return nullptr;
   }
 
-  // Return the range resulting from induction variable analysis of "instruction" when the value
-  // is used from "context", for example, an index used from a bounds-check inside a loop body.
-  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1;
-    InductionVarRange::Value v2;
-    bool needs_finite_test = false;
-    induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
-      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
-      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
-    }
-    // Didn't find anything useful.
-    return nullptr;
-  }
-
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1330,17 +721,6 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() < 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondLT);
-        }
-      }
-
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1364,17 +744,6 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() > 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondGT);
-        }
-      }
-
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -1400,38 +769,34 @@
     }
   }
 
-  void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
     DCHECK(array_length->IsIntConstant() ||
            array_length->IsArrayLength() ||
            array_length->IsPhi());
-
-    if (array_length->IsPhi()) {
-      // Input 1 of the phi contains the real array.length once the loop body is
-      // entered. That value will be used for bound analysis. The graph is still
-      // strictly in SSA form.
-      array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
-    }
+    bool try_dynamic_bce = true;
 
     if (!index->IsIntConstant()) {
+      // Non-constant subscript.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by local analysis.
+      // Try range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+        ReplaceInstruction(bounds_check, index);
         return;
       }
       // Try range obtained by induction variable analysis.
-      index_range = LookupInductionRange(bounds_check, index);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+      // Disables dynamic bce if OOB is certain.
+      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+        ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
+      // Constant subscript.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
@@ -1439,7 +804,7 @@
       }
       if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
         }
         return;
       }
@@ -1450,7 +815,7 @@
         ValueBound lower = existing_range->GetLower();
         DCHECK(lower.IsConstant());
         if (constant < lower.GetConstant()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
           return;
         } else {
           // Existing range isn't strong enough to eliminate the bounds check.
@@ -1485,11 +850,11 @@
           ValueRange(GetGraph()->GetArena(), lower, upper);
       GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
-  }
 
-  void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
-    bounds_check->ReplaceWith(index);
-    bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+    // If static analysis fails, and OOB is not certain, try dynamic elimination.
+    if (try_dynamic_bce) {
+      TryDynamicBCE(bounds_check);
+    }
   }
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
@@ -1508,7 +873,7 @@
     return true;
   }
 
-  void VisitPhi(HPhi* phi) {
+  void VisitPhi(HPhi* phi) OVERRIDE {
     if (phi->IsLoopHeaderPhi()
         && (phi->GetType() == Primitive::kPrimInt)
         && HasSameInputAtBackEdges(phi)) {
@@ -1555,7 +920,7 @@
     }
   }
 
-  void VisitIf(HIf* instruction) {
+  void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
       IfCondition cmp = cond->GetCondition();
@@ -1564,42 +929,11 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
-
-        HBasicBlock* block = instruction->GetBlock();
-        ValueRange* left_range = LookupValueRange(left, block);
-        if (left_range == nullptr) {
-          return;
-        }
-
-        if (left_range->IsMonotonicValueRange() &&
-            block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
-          // The comparison is for an induction variable in the loop header.
-          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
-          HBasicBlock* loop_body_successor =
-            left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
-          if (loop_body_successor == nullptr) {
-            // In case it's some strange loop structure.
-            return;
-          }
-          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
-          if ((new_left_range == left_range) ||
-              // Range narrowed with deoptimization is usually more useful than
-              // a constant range.
-              new_left_range->IsConstantValueRange()) {
-            // We are not successful in narrowing the monotonic value range to
-            // a regular value range. Try using deoptimization.
-            new_left_range = left_range->AsMonotonicValueRange()->
-                NarrowWithDeoptimization();
-            if (new_left_range != left_range) {
-              GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
-            }
-          }
-        }
       }
     }
   }
 
-  void VisitAdd(HAdd* add) {
+  void VisitAdd(HAdd* add) OVERRIDE {
     HInstruction* right = add->GetRight();
     if (right->IsIntConstant()) {
       ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1613,7 +947,7 @@
     }
   }
 
-  void VisitSub(HSub* sub) {
+  void VisitSub(HSub* sub) OVERRIDE {
     HInstruction* left = sub->GetLeft();
     HInstruction* right = sub->GetRight();
     if (right->IsIntConstant()) {
@@ -1715,19 +1049,19 @@
     }
   }
 
-  void VisitDiv(HDiv* div) {
+  void VisitDiv(HDiv* div) OVERRIDE {
     FindAndHandlePartialArrayLength(div);
   }
 
-  void VisitShr(HShr* shr) {
+  void VisitShr(HShr* shr) OVERRIDE {
     FindAndHandlePartialArrayLength(shr);
   }
 
-  void VisitUShr(HUShr* ushr) {
+  void VisitUShr(HUShr* ushr) OVERRIDE {
     FindAndHandlePartialArrayLength(ushr);
   }
 
-  void VisitAnd(HAnd* instruction) {
+  void VisitAnd(HAnd* instruction) OVERRIDE {
     if (instruction->GetRight()->IsIntConstant()) {
       int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
       if (constant > 0) {
@@ -1742,7 +1076,7 @@
     }
   }
 
-  void VisitNewArray(HNewArray* new_array) {
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->InputAt(0);
     if (!len->IsIntConstant()) {
       HInstruction *left;
@@ -1766,9 +1100,12 @@
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) {
-    // Right now it's only HLessThanOrEqual.
-    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
+      return;
+    }
+    // If this instruction was added by AddCompareWithDeoptimization(), narrow
+    // the range accordingly in subsequent basic blocks.
     HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
     HInstruction* instruction = less_than_or_equal->InputAt(0);
     if (instruction->IsArrayLength()) {
@@ -1782,6 +1119,35 @@
     }
   }
 
+  /**
+    * After null/bounds checks are eliminated, some invariant array references
+    * may be exposed underneath which can be hoisted out of the loop to the
+    * preheader or, in combination with dynamic bce, the deoptimization block.
+    *
+    * for (int i = 0; i < n; i++) {
+    *                                <-------+
+    *   for (int j = 0; j < n; j++)          |
+    *     a[i][j] = 0;               --a[i]--+
+    * }
+    *
+    * Note: this optimization is no longer applied after deoptimization on array references
+    * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
+    * those cases it would be unsafe to hoist array references across their deoptimization
+    * instruction inside a loop.
+    */
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+      HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+      if (loop->IsLoopInvariant(array_get->InputAt(0), false) &&
+          loop->IsLoopInvariant(array_get->InputAt(1), false)) {
+        SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+        if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+          HoistToPreheaderOrDeoptBlock(loop, array_get);
+        }
+      }
+    }
+  }
+
   void AddCompareWithDeoptimization(HInstruction* array_length,
                                     HIntConstant* const_instr,
                                     HBasicBlock* block) {
@@ -1803,6 +1169,9 @@
     block->InsertInstructionBefore(cond, bounds_check);
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+    // Flag that this kind of deoptimization on array references with constant
+    // subscripts has occurred to prevent further hoisting of these references.
+    has_deoptimization_on_constant_subscripts_ = true;
   }
 
   void AddComparesWithDeoptimization(HBasicBlock* block) {
@@ -1846,21 +1215,432 @@
     }
   }
 
+  /**
+   * Returns true if static range analysis based on induction variables can determine the bounds
+   * check on the given array range is always satisfied with the computed index range. The output
+   * parameter try_dynamic_bce is set to false if OOB is certain.
+   */
+  bool InductionRangeFitsIn(ValueRange* array_range,
+                            HInstruction* context,
+                            HInstruction* index,
+                            bool* try_dynamic_bce) {
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
+    do {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+          return false;
+        }
+        // Use analysis for static bce only if loop is finite.
+        if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
+        }
+      }
+    } while (induction_range_.RefineOuter(&v1, &v2));
+    return false;
+  }
+
+  /**
+   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
+   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
+   * will go out of range (we want to be rather certain of that given the slowdown of
+   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
+   * bounds checks and related null checks removed.
+   */
+  void TryDynamicBCE(HBoundsCheck* instruction) {
+    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
+    HInstruction* index = instruction->InputAt(0);
+    HInstruction* length = instruction->InputAt(1);
+    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
+    bool needs_finite_test = false;
+    bool needs_taken_test = false;
+    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
+        induction_range_.CanGenerateCode(
+            instruction, index, &needs_finite_test, &needs_taken_test) &&
+        CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
+      HInstruction* lower = nullptr;
+      HInstruction* upper = nullptr;
+      // Generate the following unsigned comparisons
+      //     if (lower > upper)   deoptimize;
+      //     if (upper >= length) deoptimize;
+      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
+      //     if (upper >= length) deoptimize;
+      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
+      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
+      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
+      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
+      if (lower != nullptr) {
+        InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+      }
+      InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+      ReplaceInstruction(instruction, index);
+    }
+  }
+
+  /**
+   * Returns true if heuristics indicate that dynamic bce may be profitable.
+   */
+  bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+    if (loop != nullptr) {
+      // A try boundary preheader is hard to handle.
+      // TODO: remove this restriction
+      if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+        return false;
+      }
+      // Does loop have early-exits? If so, the full range may not be covered by the loop
+      // at runtime and testing the range may apply deoptimization unnecessarily.
+      if (IsEarlyExitLoop(loop)) {
+        return false;
+      }
+      // Does the current basic block dominate all back edges? If not,
+      // don't apply dynamic bce to something that may not be executed.
+      for (HBasicBlock* back_edge : loop->GetBackEdges()) {
+        if (!block->Dominates(back_edge)) {
+          return false;
+        }
+      }
+      // Success!
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the loop has early exits, which implies it may not cover
+   * the full range computed by range analysis based on induction variables.
+   */
+  bool IsEarlyExitLoop(HLoopInformation* loop) {
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+    auto it = early_exit_loop_.find(loop_id);
+    if (it != early_exit_loop_.end()) {
+      return it->second;
+    }
+    // First time early-exit analysis for this loop. Since analysis requires scanning
+    // the full loop-body, results of the analysis is stored for subsequent queries.
+    HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+    for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+      for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+        if (!loop->Contains(*successor)) {
+          early_exit_loop_.Put(loop_id, true);
+          return true;
+        }
+      }
+    }
+    early_exit_loop_.Put(loop_id, false);
+    return false;
+  }
+
+  /**
+   * Returns true if the array length is already loop invariant, or can be made so
+   * by handling the null check under the hood of the array length operation.
+   */
+  bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+    if (loop->IsLoopInvariant(length, false)) {
+      return true;
+    } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+      if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+        HoistToPreheaderOrDeoptBlock(loop, length);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the null check is already loop invariant, or can be made so
+   * by generating a deoptimization test.
+   */
+  bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+    if (loop->IsLoopInvariant(check, false)) {
+      return true;
+    } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+      HInstruction* array = check->InputAt(0);
+      if (loop->IsLoopInvariant(array, false)) {
+        // Generate: if (array == null) deoptimize;
+        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HInstruction* cond =
+            new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+        InsertDeopt(loop, block, cond);
+        ReplaceInstruction(check, array);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if compiler can apply dynamic bce to loops that may be infinite
+   * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+   * the range analysis evaluation code by "overshooting" the computed range.
+   * Since deoptimization would be a bad choice, and there is no other version
+   * of the loop to use, dynamic bce in such cases is only allowed if other tests
+   * ensure the loop is finite.
+   */
+  bool CanHandleInfiniteLoop(
+      HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+    if (needs_infinite_test) {
+      // If we already forced the loop to be finite, allow directly.
+      const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+      if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+        return true;
+      }
+      // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+      // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+      // ensure upper bound cannot cause an infinite loop.
+      HInstruction* control = loop->GetHeader()->GetLastInstruction();
+      if (control->IsIf()) {
+        HInstruction* if_expr = control->AsIf()->InputAt(0);
+        if (if_expr->IsCondition()) {
+          HCondition* condition = if_expr->AsCondition();
+          if (index == condition->InputAt(0) ||
+              index == condition->InputAt(1)) {
+            finite_loop_.insert(loop_id);
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /** Inserts a deoptimization test. */
+  void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+    HInstruction* suspend = loop->GetSuspendCheck();
+    block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+    if (suspend->HasEnvironment()) {
+      deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+          suspend->GetEnvironment(), loop->GetHeader());
+    }
+  }
+
+  /** Hoists instruction out of the loop to preheader or deoptimization block. */
+  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    HBasicBlock* preheader = loop->GetPreHeader();
+    HBasicBlock* block = preheader;
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      block = it->second;
+    }
+    // Hoist the instruction.
+    DCHECK(!instruction->HasEnvironment());
+    instruction->MoveBefore(block->GetLastInstruction());
+  }
+
+  /**
+   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * The taken-test protects range analysis evaluation code to avoid any
+   * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+   *
+   * Returns block in which deoptimizations/invariants can be put.
+   *
+   *          old_preheader
+   *               |
+   *            if_block          <- taken-test protects deoptimization block
+   *            /      \
+   *     true_block  false_block  <- deoptimizations/invariants are placed in true_block
+   *            \       /
+   *          new_preheader       <- may require phi nodes to preserve SSA structure
+   *                |
+   *             header
+   *
+   * For example, this loop:
+   *
+   *   for (int i = lower; i < upper; i++) {
+   *     array[i] = 0;
+   *   }
+   *
+   * will be transformed to:
+   *
+   *   if (lower < upper) {
+   *     if (array == null) deoptimize;
+   *     array_length = array.length;
+   *     if (lower > upper)         deoptimize;  // unsigned
+   *     if (upper >= array_length) deoptimize;  // unsigned
+   *   } else {
+   *     array_length = 0;
+   *   }
+   *   for (int i = lower; i < upper; i++) {
+   *     // Loop without null check and bounds check, and any array.length replaced with array_length.
+   *     array[i] = 0;
+   *   }
+   */
+  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader), or already done (can reuse)?
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    if (!needs_taken_test) {
+      return loop->GetPreHeader();
+    } else {
+      auto it = taken_test_loop_.find(loop_id);
+      if (it != taken_test_loop_.end()) {
+        return it->second;
+      }
+    }
+
+    // Generate top test structure.
+    HBasicBlock* header = loop->GetHeader();
+    GetGraph()->TransformLoopHeaderForBCE(header);
+    HBasicBlock* new_preheader = loop->GetPreHeader();
+    HBasicBlock* if_block = new_preheader->GetDominator();
+    HBasicBlock* true_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* false_block = if_block->GetSuccessors()[1];  // False successor.
+
+    // Goto instructions.
+    true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+    // Insert the taken-test to see if the loop body is entered. If the
+    // loop isn't entered at all, it jumps around the deoptimization block.
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());  // placeholder
+    HInstruction* condition = nullptr;
+    induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+                                       GetGraph(),
+                                       if_block,
+                                       &condition);
+    DCHECK(condition != nullptr);
+    if_block->RemoveInstruction(if_block->GetLastInstruction());
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+    taken_test_loop_.Put(loop_id, true_block);
+    return true_block;
+  }
+
+  /**
+   * Inserts phi nodes that preserve SSA structure in generated top test structures.
+   * All uses of instructions in the deoptimization block that reach the loop need
+   * a phi node in the new loop preheader to fix the dominance relation.
+   *
+   * Example:
+   *           if_block
+   *            /      \
+   *         x_0 = ..  false_block
+   *            \       /
+   *           x_1 = phi(x_0, null)   <- synthetic phi
+   *               |
+   *             header
+   */
+  void InsertPhiNodes() {
+    // Scan all new deoptimization blocks.
+    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+      HBasicBlock* true_block = it1->second;
+      HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+      // Scan all instructions in a new deoptimization block.
+      for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* instruction = it.Current();
+        Primitive::Type type = instruction->GetType();
+        HPhi* phi = nullptr;
+        // Scan all uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HInstruction*> it2(instruction->GetUses());
+             !it2.Done();
+             it2.Advance()) {
+          HInstruction* user = it2.Current()->GetUser();
+          if (user->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->ReplaceInput(phi, it2.Current()->GetIndex());
+          }
+        }
+        // Scan all environment uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
+             !it2.Done();
+             it2.Advance()) {
+          HEnvironment* user = it2.Current()->GetUser();
+          if (user->GetHolder()->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->RemoveAsUserOfInput(it2.Current()->GetIndex());
+            user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
+            phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+   * These are synthetic phi nodes without a virtual register.
+   */
+  HPhi* NewPhi(HBasicBlock* new_preheader,
+               HInstruction* instruction,
+               Primitive::Type type) {
+    HGraph* graph = GetGraph();
+    HInstruction* zero;
+    switch (type) {
+      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      default: zero = graph->GetConstant(type, 0); break;
+    }
+    HPhi* phi = new (graph->GetArena())
+        HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+    phi->SetRawInputAt(0, instruction);
+    phi->SetRawInputAt(1, zero);
+    new_preheader->AddPhi(phi);
+    return phi;
+  }
+
+  /** Helper method to replace an instruction with another instruction. */
+  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    instruction->ReplaceWith(replacement);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+
+  // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
   // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
   // a block that checks a constant index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
 
+  // Early-exit loop bookkeeping.
+  ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+  // Taken-test loop bookkeeping.
+  ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+  // Finite loop bookkeeping.
+  ArenaSet<uint32_t> finite_loop_;
+
   // For the block, there is at least one HArrayLength instruction for which there
   // is more than one bounds check instruction with constant indexing. And it's
   // beneficial to add a compare instruction that has deoptimization fallback and
   // eliminate those bounds checks.
   bool need_to_revisit_block_;
 
+  // Flag that denotes whether deoptimization has occurred on array references
+  // with constant subscripts (see AddCompareWithDeoptimization()).
+  bool has_deoptimization_on_constant_subscripts_;
+
   // Initial number of blocks.
   uint32_t initial_block_size_;
 
+  // Side effects.
+  const SideEffectsAnalysis& side_effects_;
+
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
@@ -1872,14 +1652,12 @@
     return;
   }
 
-  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
-  //
-  // TODO: only visit blocks that dominate some array accesses.
+  BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
   HBasicBlock* last_visited_block = nullptr;
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
@@ -1896,6 +1674,9 @@
     visitor.VisitBasicBlock(current);
     last_visited_block = current;
   }
+
+  // Perform cleanup.
+  visitor.Finish();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca..b9df686 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,12 +21,16 @@
 
 namespace art {
 
+class SideEffectsAnalysis;
 class HInductionVarAnalysis;
 
 class BoundsCheckElimination : public HOptimization {
  public:
-  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BoundsCheckElimination(HGraph* graph,
+                         const SideEffectsAnalysis& side_effects,
+                         HInductionVarAnalysis* induction_analysis)
       : HOptimization(graph, kBoundsCheckEliminiationPassName),
+        side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
@@ -34,6 +38,7 @@
   static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
 
  private:
+  const SideEffectsAnalysis& side_effects_;
   HInductionVarAnalysis* induction_analysis_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2..dbeb1cc 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -54,7 +54,7 @@
     HInductionVarAnalysis induction(graph_);
     induction.Run();
 
-    BoundsCheckElimination(graph_, &induction).Run();
+    BoundsCheckElimination(graph_, side_effects, &induction).Run();
   }
 
   ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index d7754e8..8e75bdc 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -735,6 +735,79 @@
   }
 }
 
+ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time compilation),
+      // bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    uint16_t vtable_index = resolved_method->GetMethodIndex();
+    ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+        vtable_index, class_linker->GetImagePointerSize());
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      // could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
                                 uint32_t dex_pc,
                                 uint32_t method_idx,
@@ -742,22 +815,18 @@
                                 bool is_range,
                                 uint32_t* args,
                                 uint32_t register_index) {
-  InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  InvokeType optimized_invoke_type = original_invoke_type;
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
   const char* descriptor = dex_file_->GetMethodShorty(method_idx);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
 
   // Remove the return type from the 'proto'.
   size_t number_of_arguments = strlen(descriptor) - 1;
-  if (original_invoke_type != kStatic) {  // instance call
+  if (invoke_type != kStatic) {  // instance call
     // One extra argument for 'this'.
     number_of_arguments++;
   }
 
   MethodReference target_method(dex_file_, method_idx);
-  int32_t table_index = 0;
-  uintptr_t direct_code = 0;
-  uintptr_t direct_method = 0;
 
   // Special handling for string init.
   int32_t string_init_offset = 0;
@@ -780,7 +849,7 @@
         method_idx,
         target_method,
         dispatch_info,
-        original_invoke_type,
+        invoke_type,
         kStatic /* optimized_invoke_type */,
         HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
     return HandleStringInit(invoke,
@@ -791,23 +860,16 @@
                             descriptor);
   }
 
-  // Handle unresolved methods.
-  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
-                                           dex_pc,
-                                           true /* update_stats */,
-                                           true /* enable_devirtualization */,
-                                           &optimized_invoke_type,
-                                           &target_method,
-                                           &table_index,
-                                           &direct_code,
-                                           &direct_method)) {
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (resolved_method == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
                                                      dex_pc,
                                                      method_idx,
-                                                     original_invoke_type);
+                                                     invoke_type);
     return HandleInvoke(invoke,
                         number_of_vreg_arguments,
                         args,
@@ -817,21 +879,26 @@
                         nullptr /* clinit_check */);
   }
 
-  // Handle resolved methods (non string init).
-
-  DCHECK(optimized_invoke_type != kSuper);
-
   // Potential class initialization check, in the case of a static method call.
   HClinitCheck* clinit_check = nullptr;
   HInvoke* invoke = nullptr;
 
-  if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
     // By default, consider that the called method implicitly requires
     // an initialization check of its declaring method.
     HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
         = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    if (optimized_invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
     }
 
     HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
@@ -847,24 +914,26 @@
                                                 method_idx,
                                                 target_method,
                                                 dispatch_info,
-                                                original_invoke_type,
-                                                optimized_invoke_type,
+                                                invoke_type,
+                                                invoke_type,
                                                 clinit_check_requirement);
-  } else if (optimized_invoke_type == kVirtual) {
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeVirtual(arena_,
                                          number_of_arguments,
                                          return_type,
                                          dex_pc,
                                          method_idx,
-                                         table_index);
+                                         resolved_method->GetMethodIndex());
   } else {
-    DCHECK_EQ(optimized_invoke_type, kInterface);
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeInterface(arena_,
                                            number_of_arguments,
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           table_index);
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
@@ -962,23 +1031,18 @@
 
 HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* resolved_method,
       uint32_t method_idx,
       HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
-      soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
-
-  DCHECK(resolved_method != nullptr);
-
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+          self, *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
   Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 5ada93f..c3979f3 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -305,8 +305,10 @@
 
   HClinitCheck* ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* method,
       uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Build a HNewInstance instruction.
   bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
@@ -315,6 +317,10 @@
   bool IsInitialized(Handle<mirror::Class> cls) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a98d9c6..ac6b5e8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -724,7 +724,9 @@
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -1922,10 +1924,18 @@
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -5818,16 +5828,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   if (desired_dispatch_info.code_ptr_location ==
       HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5850,6 +5850,32 @@
   return desired_dispatch_info;
 }
 
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   switch (invoke->GetCodePtrLocation()) {
@@ -5881,11 +5907,15 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
@@ -5943,12 +5973,16 @@
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -5970,7 +6004,11 @@
 
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5996,6 +6034,28 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const auto& pair : dex_cache_arrays_base_labels_) {
+    HArmDexCacheArraysBase* base = pair.first;
+    const DexCacheArraysBaseLabels* labels = &pair.second;
+    const DexFile& dex_file = base->GetDexFile();
+    size_t base_element_offset = base->GetElementOffset();
+    DCHECK(labels->add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(labels->movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(labels->movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -6107,6 +6167,23 @@
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+  codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 89de4f8..193add2 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -373,6 +373,31 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct DexCacheArraysBaseLabels {
+    DexCacheArraysBaseLabels() = default;
+    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+    DexCacheArraysBaseLabels labels;
+    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+  }
+
+  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+    auto it = dex_cache_arrays_base_labels_.find(base);
+    DCHECK(it != dex_cache_arrays_base_labels_.end());
+    return &it->second;
+  }
+
   // Generate a read barrier for a heap reference within `instruction`.
   //
   // A read barrier for an object reference read from the heap is
@@ -419,7 +444,12 @@
   void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
 
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+                                                     DexCacheArraysBaseLabels,
+                                                     std::less<HArmDexCacheArraysBase*>>;
 
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -441,6 +471,8 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
 
+  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index ac16268..04acd9d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3618,8 +3618,12 @@
 }
 
 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   Register temp = XRegisterFrom(temp_in);
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
@@ -3630,11 +3634,10 @@
 
   DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+  __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
-  // However this is not required in practice, as this is an
   // intermediate/temporary reference and because the current
   // concurrent copying collector keeps the from-space memory
   // intact/accessible until the end of the marking phase (the
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 934f24b..bc5eb31 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2986,8 +2986,13 @@
 }
 
 void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  GpuRegister receiver = calling_convention.GetRegisterAt(0);
+
   GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
@@ -2995,8 +3000,7 @@
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
 
   // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1fc09a8..2fb87d3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1929,8 +1929,7 @@
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
   if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(),
-                                    Location::RequiresRegister());
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 
   if (codegen_->IsBaseline()) {
@@ -1970,6 +1969,11 @@
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -4151,12 +4155,16 @@
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+  __ movl(temp, Address(receiver, class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 534ee1c..4618be9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -802,12 +802,17 @@
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
+
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+  __ movl(temp, Address(CpuRegister(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..6582063
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000..015f910
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index b3b09d2..c16b872 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -739,7 +739,9 @@
   // created for constants which were untyped in DEX. Note that this test can be skipped for
   // a synthetic phi (indicated by lack of a virtual register).
   if (phi->GetRegNumber() != kNoRegNumber) {
-    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+         !phi_it.Done();
+         phi_it.Advance()) {
       HPhi* other_phi = phi_it.Current()->AsPhi();
       if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
         if (phi->GetType() == other_phi->GetType()) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 48bcd10..e9fdb84 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -393,15 +393,18 @@
 
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     VisitInvoke(invoke);
-    StartAttributeStream("recursive") << std::boolalpha
-                                      << invoke->IsRecursive()
-                                      << std::noboolalpha;
+    StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
     if (invoke->IsStatic()) {
       StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
     }
   }
 
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    VisitInvoke(invoke);
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+  }
+
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
     StartAttributeStream("field_type") << field_access->GetFieldType();
   }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index b40ef5a..9d0cde7 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -119,6 +119,17 @@
   }
 }
 
+bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) {
+  Value v1 = RefineOuter(*min_val, /* is_min */ true);
+  Value v2 = RefineOuter(*max_val, /* is_min */ false);
+  if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) {
+    *min_val = v1;
+    *max_val = v2;
+    return true;
+  }
+  return false;
+}
+
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
                                         /*out*/bool* needs_finite_test,
@@ -202,6 +213,8 @@
     } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
     }
+  } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+    return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
   } else if (is_min) {
     // Special case for finding minimum: minimum of trip-count in loop-body is 1.
     if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
@@ -404,6 +417,25 @@
   return Value();
 }
 
+InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) {
+  if (v.instruction != nullptr) {
+    HLoopInformation* loop =
+        v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
+    if (loop != nullptr) {
+      // Set up loop information.
+      bool in_body = true;  // use is always in body of outer loop
+      HInductionVarAnalysis::InductionInfo* info =
+          induction_analysis_->LookupInfo(loop, v.instruction);
+      HInductionVarAnalysis::InductionInfo* trip =
+          induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction());
+      // Try to refine "a x instruction + b" with outer loop range information on instruction.
+      return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)),
+                      Value(v.b_constant));
+    }
+  }
+  return v;
+}
+
 bool InductionVarRange::GenerateCode(HInstruction* context,
                                      HInstruction* instruction,
                                      HGraph* graph,
@@ -425,9 +457,13 @@
     }
     HInductionVarAnalysis::InductionInfo* trip =
         induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    // Determine what tests are needed.
+    // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+    // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+    // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+    // code does not use the trip-count explicitly (since there could be an implicit relation
+    // between e.g. an invariant subscript and a not-taken condition).
     *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-    *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip);
+    *needs_taken_test = IsBodyTripCount(trip);
     // Code generation for taken test: generate the code when requested or otherwise analyze
     // if code generation is feasible when taken test is needed.
     if (taken_test != nullptr) {
@@ -512,10 +548,13 @@
             }
             break;
           case HInductionVarAnalysis::kFetch:
-            if (graph != nullptr) {
-              *result = info->fetch;  // already in HIR
+            if (info->fetch->GetType() == type) {
+              if (graph != nullptr) {
+                *result = info->fetch;  // already in HIR
+              }
+              return true;
             }
-            return true;
+            break;
           case HInductionVarAnalysis::kTripCountInLoop:
           case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
@@ -545,29 +584,42 @@
         }
         break;
       case HInductionVarAnalysis::kLinear: {
-          // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
-          // to avoid arithmetic wrap-around situations that are hard to guard against.
-          int32_t stride_value = 0;
-          if (GetConstant(info->op_a, &stride_value)) {
-            if (stride_value == 1 || stride_value == -1) {
-              const bool is_min_a = stride_value == 1 ? is_min : !is_min;
-              if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
-                  GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
-                if (graph != nullptr) {
-                  HInstruction* oper;
-                  if (stride_value == 1) {
-                    oper = new (graph->GetArena()) HAdd(type, opa, opb);
-                  } else {
-                    oper = new (graph->GetArena()) HSub(type, opb, opa);
-                  }
-                  *result = Insert(block, oper);
+        // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
+        // to avoid arithmetic wrap-around situations that are hard to guard against.
+        int32_t stride_value = 0;
+        if (GetConstant(info->op_a, &stride_value)) {
+          if (stride_value == 1 || stride_value == -1) {
+            const bool is_min_a = stride_value == 1 ? is_min : !is_min;
+            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
                 }
-                return true;
+                *result = Insert(block, oper);
               }
+              return true;
             }
           }
         }
         break;
+      }
+      case HInductionVarAnalysis::kWrapAround:
+      case HInductionVarAnalysis::kPeriodic: {
+        // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+        // values are easy to test at runtime without complications of arithmetic wrap-around.
+        Value extreme = GetVal(info, trip, in_body, is_min);
+        if (extreme.is_known && extreme.a_constant == 0) {
+          if (graph != nullptr) {
+            *result = graph->GetIntConstant(extreme.b_constant);
+          }
+          return true;
+        }
+        break;
+      }
       default:
         break;
     }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7984871..71b0b1b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -68,6 +68,9 @@
                          /*out*/Value* max_val,
                          /*out*/bool* needs_finite_test);
 
+  /** Refines the values with induction of next outer loop. Returns true on change. */
+  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val);
+
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
    * bound expressions on the instruction in the given context. The need_finite_test
@@ -149,6 +152,12 @@
   static Value MergeVal(Value v1, Value v2, bool is_min);
 
   /**
+   * Returns refined value using induction of next outer loop or the input value if no
+   * further refinement is possible.
+   */
+  Value RefineOuter(Value val, bool is_min);
+
+  /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
    * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index c2ba157..128b5bb 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -473,16 +473,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -498,16 +501,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -527,16 +533,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(parameter, 1, -1), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(parameter, 1, 0), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
@@ -597,16 +606,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(parameter, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(parameter, 1, 0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0363f20..a4dcb3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -171,13 +171,37 @@
                                   const DexFile& dex_file,
                                   uint32_t referrer_index)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
     return method->GetDexMethodIndex();
   } else {
     return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index);
   }
 }
 
+static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls->GetDexCache() == nullptr) {
+    DCHECK(cls->IsArrayClass());
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    // TODO: deal with proxy classes.
+    return DexFile::kDexNoIndex;
+  } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    // Update the dex cache to ensure the class is in. The generated code will
+    // consider it is. We make it safe by updating the dex cache, as other
+    // dex files might also load the class, and there is no guarantee the dex
+    // cache of the dex file of the class will be updated.
+    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
+      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    }
+    return cls->GetDexTypeIndex();
+  } else {
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  }
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -192,6 +216,10 @@
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+      VLOG(compiler) << "Not inlining a String.<init> method";
+      return false;
+    }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
     mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
@@ -210,53 +238,176 @@
     return false;
   }
 
-  if (!invoke_instruction->IsInvokeStaticOrDirect()) {
-    resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
-    if (resolved_method == nullptr) {
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    return TryInline(invoke_instruction, resolved_method);
+  }
+
+  // Check if we can statically find the method.
+  ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+  if (actual_method != nullptr) {
+    return TryInline(invoke_instruction, actual_method);
+  }
+
+  // Check if we can use an inline cache.
+  ArtMethod* caller = graph_->GetArtMethod();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  // Under JIT, we should always know the caller.
+  DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr));
+  if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) {
+    ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size);
+    const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+    if (ic.IsUnitialized()) {
       VLOG(compiler) << "Interface or virtual call to "
                      << PrettyMethod(method_index, caller_dex_file)
-                     << " could not be statically determined";
+                     << " is not hit and not inlined";
       return false;
-    }
-    // We have found a method, but we need to find where that method is for the caller's
-    // dex file.
-    method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index);
-    if (method_index == DexFile::kDexNoIndex) {
+    } else if (ic.IsMonomorphic()) {
+      MaybeRecordStat(kMonomorphicCall);
+      return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+    } else if (ic.IsPolymorphic()) {
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+    } else {
+      DCHECK(ic.IsMegamorphic());
       VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(resolved_method)
-                     << " cannot be inlined because unaccessible to caller";
+                     << PrettyMethod(method_index, caller_dex_file)
+                     << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
       return false;
     }
   }
 
-  bool same_dex_file =
-      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
+  VLOG(compiler) << "Interface or virtual call to "
+                 << PrettyMethod(method_index, caller_dex_file)
+                 << " could not be statically determined";
+  return false;
+}
 
-  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  if (class_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because its class is not"
+                   << " accessible to the caller";
+    return false;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+        resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+        resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      invoke_instruction->GetDexPc());
+
+  bool is_referrer =
+      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(load_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(field_get, load_class);
+  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed, and eventually propagate the
+  // type of the receiver.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedMonomorphicCall);
+  return true;
+}
+
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
+  // TODO
+  VLOG(compiler) << "Unimplemented polymorphic inlining for "
+                 << PrettyMethod(resolved_method);
+  return false;
+}
+
+bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t method_index = FindMethodIndexIn(
+      method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+  if (method_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to "
+                   << PrettyMethod(method)
+                   << " cannot be inlined because unaccessible to caller";
+    return false;
+  }
+
+  bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
+
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is too big to inline";
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of try block";
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
-    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+  if (!method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
     if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
+          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -273,7 +424,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) {
+  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
     return false;
   }
 
@@ -284,7 +435,8 @@
 
 bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
                                  HInvoke* invoke_instruction,
-                                 bool same_dex_file) {
+                                 bool same_dex_file,
+                                 bool do_rtp) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -337,6 +489,7 @@
       invoke_type,
       graph_->IsDebuggable(),
       graph_->GetCurrentInstructionId());
+  callee_graph->SetArtMethod(resolved_method);
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
@@ -418,6 +571,7 @@
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
+                     outermost_graph_,
                      codegen_,
                      outer_compilation_unit_,
                      dex_compilation_unit,
@@ -529,9 +683,9 @@
   HNullConstant* null_constant = graph_->GetNullConstant();
   if (!null_constant->GetReferenceTypeInfo().IsValid()) {
     ReferenceTypeInfo::TypeHandle obj_handle =
-            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
+        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
     null_constant->SetReferenceTypeInfo(
-            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
   }
 
   // Check the integrity of reference types and run another type propagation if needed.
@@ -550,14 +704,16 @@
          return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
 
-    // If the return type is a refinement of the declared type run the type propagation again.
-    ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-    ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-    if (invoke_rti.IsStrictSupertypeOf(return_rti)
-        || (return_rti.IsExact() && !invoke_rti.IsExact())
-        || !return_replacement->CanBeNull()) {
-      ReferenceTypePropagation rtp_fixup(graph_, handles_);
-      rtp_fixup.Run();
+    if (do_rtp) {
+      // If the return type is a refinement of the declared type run the type propagation again.
+      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+      if (invoke_rti.IsStrictSupertypeOf(return_rti)
+          || (return_rti.IsExact() && !invoke_rti.IsExact())
+          || !return_replacement->CanBeNull()) {
+        ReferenceTypePropagation rtp_fixup(graph_, handles_);
+        rtp_fixup.Run();
+      }
     }
   }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 0f6a945..7b9fb73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -27,11 +27,13 @@
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
+class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           HGraph* outermost_graph,
            CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
@@ -40,6 +42,7 @@
            OptimizingCompilerStats* stats,
            size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
+        outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
@@ -54,10 +57,33 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction);
+
+  // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
+  // reference type propagation can run after the inlining.
+  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline the target of a monomorphic call. If successful, the code
+  // in the graph will look like:
+  // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
+  // ... // inlined code
+  bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool TryBuildAndInline(ArtMethod* resolved_method,
                          HInvoke* invoke_instruction,
-                         bool same_dex_file);
+                         bool same_dex_file,
+                         bool do_rtp = true);
 
+  HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b01324e..8340811 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -384,7 +384,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
-      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
       invoke->IsInvokeVirtual() ? kVirtual : kSuper;
   switch (intrinsic_type) {
     case kStatic:
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..02befc0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -121,6 +121,8 @@
           // phi in it.
           if (instruction->NeedsEnvironment()) {
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
+          } else {
+            DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
         } else if (instruction->CanThrow()) {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b5ac773..9b26de4 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1177,6 +1177,59 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  HUseIterator<HInstruction*> uses_it(GetUses());
+  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+  uses_it.Advance();
+  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+    uses_it.Advance();
+  }
+  if (!uses_it.Done()) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; !uses_it.Done(); uses_it.Advance()) {
+      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = uses_it2.Current()->GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
   DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -2079,6 +2132,26 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      return os << "string_init";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      return os << "recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      return os << "direct";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      return os << "direct_fixup";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      return os << "dex_cache_pc_relative";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      return os << "dex_cache_via_method";
+    default:
+      LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
@@ -2088,7 +2161,8 @@
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
       return os << "none";
     default:
-      return os << "unknown:" << static_cast<int>(rhs);
+      LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d5110a7..9d3c88c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -371,6 +371,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  ArtMethod* GetArtMethod() const { return art_method_; }
+  void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+
   // Returns an instruction with the opposite boolean value from 'cond'.
   // The instruction has been inserted into the graph, either as a constant, or
   // before cursor.
@@ -479,6 +482,11 @@
 
   HCurrentMethod* cached_current_method_;
 
+  // The ArtMethod this graph is for. Note that for AOT, it may be null,
+  // for example for methods whose declaring class could not be resolved
+  // (such as when the superclass could not be found).
+  ArtMethod* art_method_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1095,7 +1103,12 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
@@ -1952,6 +1965,14 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
@@ -2449,11 +2470,15 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
-  explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
@@ -3418,7 +3443,7 @@
                         MethodReference target_method,
                         DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type,
+                        InvokeType optimized_invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
@@ -3432,7 +3457,7 @@
                 dex_pc,
                 method_index,
                 original_invoke_type),
-        invoke_type_(invoke_type),
+        optimized_invoke_type_(optimized_invoke_type),
         clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
         dispatch_info_(dispatch_info) { }
@@ -3478,7 +3503,11 @@
   // platform-specific special input, such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
 
-  InvokeType GetInvokeType() const { return invoke_type_; }
+  InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
+  void SetOptimizedInvokeType(InvokeType invoke_type) {
+    optimized_invoke_type_ = invoke_type;
+  }
+
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
@@ -3501,6 +3530,7 @@
   }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
+  void SetTargetMethod(MethodReference method) { target_method_ = method; }
 
   int32_t GetStringInitOffset() const {
     DCHECK(IsStringInit());
@@ -3526,7 +3556,7 @@
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
-    return GetInvokeType() == kStatic;
+    return GetOriginalInvokeType() == kStatic;
   }
 
   // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
@@ -3599,7 +3629,7 @@
   void RemoveInputAt(size_t index);
 
  private:
-  const InvokeType invoke_type_;
+  InvokeType optimized_invoke_type_;
   ClinitCheckRequirement clinit_check_requirement_;
   // The target method may refer to different dex file or method index than the original
   // invoke. This happens for sharpened calls and for calls where a method was redeclared
@@ -3609,6 +3639,7 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
 class HInvokeVirtual : public HInvoke {
@@ -5566,6 +5597,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..6a1dbb9
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8440813..3495603 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,6 +20,10 @@
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
@@ -423,7 +427,7 @@
     return;
   }
   HInliner* inliner = new (graph->GetArena()) HInliner(
-    graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -435,6 +439,17 @@
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      HOptimization* arm_optimizations[] = {
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
@@ -500,12 +515,13 @@
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   ReferenceTypePropagation* type_propagation =
       new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
@@ -558,6 +574,7 @@
       licm,
       induction,
       bce,
+      fold3,  // evaluates code generated by dynamic bce
       simplify3,
       lse,
       dce2,
@@ -607,8 +624,6 @@
   stack_map.resize(codegen->ComputeStackMapsSize());
   codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
-
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -643,7 +658,6 @@
   ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -749,8 +763,8 @@
     ArtMethod* art_method = compiler_driver->ResolveMethod(
         soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
     // We may not get a method, for example if its class is erroneous.
-    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
     if (art_method != nullptr) {
+      graph->SetArtMethod(art_method);
       interpreter_metadata = art_method->GetQuickenedInfo();
     }
   }
@@ -847,6 +861,7 @@
                    dex_file,
                    dex_cache));
     if (codegen.get() != nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kCompiled);
       if (run_optimizations_) {
         method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
       } else {
@@ -857,7 +872,7 @@
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
       MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
     }
   }
 
@@ -933,6 +948,7 @@
   if (stack_map_data == nullptr) {
     return false;
   }
+  MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
   const void* code = code_cache->CommitCode(
       self,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..6296eed 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
-#include <sstream>
+#include <iomanip>
 #include <string>
 #include <type_traits>
 
@@ -27,18 +27,18 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
-  kCompiledBaseline,
-  kCompiledOptimized,
+  kCompiled,
   kInlinedInvoke,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
   kUnresolvedField,
   kUnresolvedFieldNotAFastAccess,
+  kRemovedCheckedCast,
+  kRemovedDeadInstruction,
+  kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
   kNotCompiledCannotBuildSSA,
-  kNotCompiledCantAccesType,
-  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -47,13 +47,12 @@
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
   kNotCompiledUnsupportedIsa,
+  kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
-  kNotOptimizedDisabled,
-  kNotOptimizedRegisterAllocator,
-  kNotOptimizedTryCatch,
-  kRemovedCheckedCast,
-  kRemovedDeadInstruction,
-  kRemovedNullCheck,
+  kInlinedMonomorphicCall,
+  kMonomorphicCall,
+  kPolymorphicCall,
+  kMegamorphicCall,
   kLastStat
 };
 
@@ -66,20 +65,19 @@
   }
 
   void Log() const {
+    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+      // Log only in debug builds or if the compiler is verbose.
+      return;
+    }
+
     if (compile_stats_[kAttemptCompilation] == 0) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      size_t unoptimized_percent =
-          compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
-      size_t optimized_percent =
-          compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      std::ostringstream oss;
-      oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
-      oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
-      oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
-      LOG(INFO) << oss.str();
+      float compiled_percent =
+          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+          << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
       for (int i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
@@ -92,41 +90,42 @@
 
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+    std::string name;
     switch (stat) {
-      case kAttemptCompilation : return "kAttemptCompilation";
-      case kCompiledBaseline : return "kCompiledBaseline";
-      case kCompiledOptimized : return "kCompiledOptimized";
-      case kInlinedInvoke : return "kInlinedInvoke";
-      case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
-      case kUnresolvedMethod : return "kUnresolvedMethod";
-      case kUnresolvedField : return "kUnresolvedField";
-      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
-      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
-      case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
-      case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
-      case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
-      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kRemovedCheckedCast: return "kRemovedCheckedCast";
-      case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
-      case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCompiled : name = "Compiled"; break;
+      case kInlinedInvoke : name = "InlinedInvoke"; break;
+      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+      case kUnresolvedField : name = "UnresolvedField"; break;
+      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kMonomorphicCall: name = "MonomorphicCall"; break;
+      case kPolymorphicCall: name = "PolymorphicCall"; break;
+      case kMegamorphicCall: name = "kMegamorphicCall"; break;
 
-      case kLastStat: break;  // Invalid to print out.
+      case kLastStat:
+        LOG(FATAL) << "invalid stat "
+            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+        UNREACHABLE();
     }
-    LOG(FATAL) << "invalid stat "
-        << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-    UNREACHABLE();
+    return "OptStat#" + name;
   }
 
   AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 808a1dc..b383f1e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -26,6 +26,15 @@
  public:
   explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
 
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
  private:
   void VisitAdd(HAdd* add) OVERRIDE {
     BinaryFP(add);
@@ -72,7 +81,7 @@
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
-    InitializePCRelativeBasePointer(switch_insn);
+    InitializePCRelativeBasePointer();
     HGraph* graph = GetGraph();
     HBasicBlock* block = switch_insn->GetBlock();
     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,22 +93,22 @@
     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
   }
 
-  void InitializePCRelativeBasePointer(HInstruction* user) {
+  void InitializePCRelativeBasePointer() {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
       return;
     }
 
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
     DCHECK(base_ != nullptr);
   }
 
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializePCRelativeBasePointer(insn);
+    InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
         new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
@@ -111,7 +120,7 @@
     // addressing, we need the PC-relative address base.
     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
-      InitializePCRelativeBasePointer(invoke);
+      InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
       DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
       invoke_static_or_direct->AddSpecialInput(base_);
@@ -133,6 +142,7 @@
 void PcRelativeFixups::Run() {
   PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 0d05c49..dd34924 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -127,6 +127,87 @@
   }
 }
 
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+  ArenaAllocatorAdapter<void> adapter =
+      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+  ArenaVector<HPhi*> visited_phis(adapter);
+  ArenaVector<HInstruction*> worklist(adapter);
+  worklist.push_back(root_instr);
+
+  while (!worklist.empty()) {
+    HInstruction* instr = worklist.back();
+    worklist.pop_back();
+
+    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+      // Expect that both `root_instr` and its inputs have invalid RTI.
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+      // Insert all unvisited inputs to the worklist.
+      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current();
+        if (input->IsPhi()) {
+          if (ContainsElement(visited_phis, input->AsPhi())) {
+            continue;
+          } else {
+            visited_phis.push_back(input->AsPhi());
+          }
+        }
+        worklist.push_back(input);
+      }
+    } else if (instr->IsNullConstant()) {
+      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+    } else {
+      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+          << instr->GetReferenceTypeInfo();
+      UNREACHABLE();
+    }
+  }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+  }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+  // In some cases, the fix-point iteration will leave kPrimNot instructions with
+  // invalid RTI because bytecode does not provide enough typing information.
+  // Set the RTI of such instructions to Object.
+  // Example:
+  //   MyClass a = null, b = null;
+  //   while (a == null) {
+  //     if (cond) { a = b; } else { b = a; }
+  //   }
+
+  if (kIsDebugBuild) {
+    // Test that if we are going to set RTI from invalid to Object, that
+    // instruction did not have any typed instructions in its def-use chain
+    // and therefore its type could not be inferred.
+    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+  }
+
+  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+    instr->SetReferenceTypeInfo(obj_rti);
+  });
+}
+
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -136,6 +217,7 @@
   }
 
   ProcessWorklist();
+  SetUntypedInstructionsToObject();
   ValidateTypes();
 }
 
@@ -534,8 +616,9 @@
 void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
 void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -588,11 +671,16 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -653,7 +741,9 @@
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
+  if (!parent_rti.IsValid()) {
+    return;
+  }
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
   if (handle->IsObjectArrayClass()) {
@@ -665,8 +755,6 @@
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
   }
-
-  return;
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -683,7 +771,7 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
     UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
@@ -711,6 +799,7 @@
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
+
   ScopedObjectAccess soa(Thread::Current());
   UpdateArrayGet(instr, handles_, object_class_handle_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
@@ -770,7 +859,10 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5c05592..21789e1 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -57,6 +57,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
+  void SetUntypedInstructionsToObject();
 
   StackHandleScopeCollection* handles_;
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a128079..5e1d1d9 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -49,7 +49,8 @@
   }
 
   // TODO: Avoid CompilerDriver.
-  InvokeType invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType optimized_invoke_type = original_invoke_type;
   MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
   int vtable_idx;
   uintptr_t direct_code, direct_method;
@@ -58,15 +59,18 @@
       invoke->GetDexPc(),
       false /* update_stats: already updated in builder */,
       true /* enable_devirtualization */,
-      &invoke_type,
+      &optimized_invoke_type,
       &target_method,
       &vtable_idx,
       &direct_code,
       &direct_method);
-  DCHECK(success);
-  DCHECK_EQ(invoke_type, invoke->GetInvokeType());
-  DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
-  DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+  if (!success) {
+    // TODO: try using kDexCachePcRelative. It's always a valid method load
+    // kind as long as it's supported by the codegen
+    return;
+  }
+  invoke->SetOptimizedInvokeType(optimized_invoke_type);
+  invoke->SetTargetMethod(target_method);
 
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 7ad5b44..cdeb443 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2569,30 +2569,19 @@
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 1de51a2..5ae2cc2 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -832,11 +832,12 @@
 TEST(Thumb2AssemblerTest, MovWMovT) {
   arm::Thumb2Assembler assembler;
 
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 9246c82..886295e 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -439,14 +439,14 @@
   nullptr
 };
 const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
 const char* SpecialAddSubResults[] = {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 89c2a7c..a1485e4 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -55,8 +55,10 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_file.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
@@ -72,7 +74,6 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
@@ -494,6 +495,7 @@
       app_image_(false),
       boot_image_(false),
       is_host_(false),
+      image_writer_(nullptr),
       driver_(nullptr),
       dump_stats_(false),
       dump_passes_(false),
@@ -1408,8 +1410,36 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
-                             oat_file_.get())) {
+      std::unique_ptr<ElfWriter> elf_writer =
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file_.get());
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      if (!oat_writer->WriteRodata(rodata)) {
+        LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      if (!oat_writer->WriteCode(text)) {
+        LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer->GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer->GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer->GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      if (!elf_writer->End()) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
         return false;
       }
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d6a8c9..a926ca5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,11 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+// from kernel <include/linux/threads.h>
+#define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
+
+static const pid_t kImgDiagGuaranteedBadPid = (PID_MAX_LIMIT + 1);
+
 class ImgDiagTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
@@ -132,7 +137,8 @@
 
   // Run imgdiag --image-diff-pid=some_bad_pid and wait until it's done with a 0 exit code.
   std::string error_msg;
-  ASSERT_FALSE(ExecDefaultBootImage(-12345, &error_msg)) << "Incorrectly executed";
+  ASSERT_FALSE(ExecDefaultBootImage(kImgDiagGuaranteedBadPid,
+                                    &error_msg)) << "Incorrectly executed";
   UNUSED(error_msg);
 }
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 94eb82b..5833129 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -28,6 +28,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -41,6 +42,8 @@
 #include "gc/space/space-inl.h"
 #include "image.h"
 #include "indenter.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -51,7 +54,6 @@
 #include "oat_file-inl.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -116,7 +118,7 @@
 
     File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
     std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file)));
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
     builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
 
     builder_->Start();
@@ -162,7 +164,7 @@
 
     builder_->End();
 
-    return builder_->Good() && output_stream->Flush();
+    return builder_->Good();
   }
 
   void Walk(Callback callback) {
@@ -1662,6 +1664,8 @@
         ImageHeader::kSectionDexCacheArrays);
     const auto& intern_section = image_header_.GetImageSection(
         ImageHeader::kSectionInternedStrings);
+    const auto& class_table_section = image_header_.GetImageSection(
+        ImageHeader::kSectionClassTable);
     stats_.header_bytes = header_bytes;
     stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
     // Add padding between the field and method section.
@@ -1678,6 +1682,7 @@
     stats_.art_method_bytes += method_section.Size();
     stats_.dex_cache_arrays_bytes += dex_cache_arrays_section.Size();
     stats_.interned_strings_bytes += intern_section.Size();
+    stats_.class_table_bytes += class_table_section.Size();
     stats_.Dump(os, indent_os);
     os << "\n";
 
@@ -2068,6 +2073,7 @@
     size_t art_method_bytes;
     size_t dex_cache_arrays_bytes;
     size_t interned_strings_bytes;
+    size_t class_table_bytes;
     size_t bitmap_bytes;
     size_t alignment_bytes;
 
@@ -2099,6 +2105,7 @@
           art_method_bytes(0),
           dex_cache_arrays_bytes(0),
           interned_strings_bytes(0),
+          class_table_bytes(0),
           bitmap_bytes(0),
           alignment_bytes(0),
           managed_code_bytes(0),
@@ -2261,6 +2268,7 @@
                                   "art_method_bytes       =  %8zd (%2.0f%% of art file bytes)\n"
                                   "dex_cache_arrays_bytes =  %8zd (%2.0f%% of art file bytes)\n"
                                   "interned_string_bytes  =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "class_table_bytes      =  %8zd (%2.0f%% of art file bytes)\n"
                                   "bitmap_bytes           =  %8zd (%2.0f%% of art file bytes)\n"
                                   "alignment_bytes        =  %8zd (%2.0f%% of art file bytes)\n\n",
                                   header_bytes, PercentOfFileBytes(header_bytes),
@@ -2271,11 +2279,14 @@
                                   PercentOfFileBytes(dex_cache_arrays_bytes),
                                   interned_strings_bytes,
                                   PercentOfFileBytes(interned_strings_bytes),
+                                  class_table_bytes, PercentOfFileBytes(class_table_bytes),
                                   bitmap_bytes, PercentOfFileBytes(bitmap_bytes),
                                   alignment_bytes, PercentOfFileBytes(alignment_bytes))
             << std::flush;
-        CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes +
-                 dex_cache_arrays_bytes + interned_strings_bytes + bitmap_bytes + alignment_bytes);
+        CHECK_EQ(file_bytes,
+                 header_bytes + object_bytes + art_field_bytes + art_method_bytes +
+                 dex_cache_arrays_bytes + interned_strings_bytes + class_table_bytes +
+                 bitmap_bytes + alignment_bytes);
       }
 
       os << "object_bytes breakdown:\n";
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 3d9f7dc..7d2d899 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -526,6 +526,21 @@
   temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
 }
 
+void PatchOat::PatchClassTable(const ImageHeader* image_header) {
+  const auto& section = image_header->GetImageSection(ImageHeader::kSectionClassTable);
+  // ClassTable temp_table;
+  // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+  // This also relies on visit roots not doing any verification which could fail after we update
+  // the roots to be the image addresses.
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  ClassTable temp_table;
+  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  FixupRootVisitor visitor(this);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&visitor, RootInfo(kRootUnknown));
+  temp_table.VisitRoots(buffered_visitor);
+}
+
+
 class RelocatedPointerVisitor {
  public:
   explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
@@ -606,6 +621,7 @@
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
   PatchInternedStrings(image_header);
+  PatchClassTable(image_header);
   // Patch dex file int/long arrays which point to ArtFields.
   PatchDexFileArrays(img_roots);
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 0915014..38bd865 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -116,6 +116,8 @@
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchClassTable(const ImageHeader* image_header)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f7ed812..238d9f3 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -106,17 +106,16 @@
   return num_registers;
 }
 
-static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ArtMethod::HasSameNameAndSignature(ArtMethod* other) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
-  const DexFile* dex_file = method1->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
-  if (method1->GetDexCache() == method2->GetDexCache()) {
-    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetDexMethodIndex());
+  if (GetDexCache() == other->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(other->GetDexMethodIndex());
     return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
   }
-  const DexFile* dex_file2 = method2->GetDexFile();
-  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file2 = other->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(other->GetDexMethodIndex());
   if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
     return false;  // Name mismatch.
   }
@@ -149,8 +148,7 @@
         mirror::Class* interface = iftable->GetInterface(i);
         for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
           ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(
-              this, interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
+          if (HasSameNameAndSignature(interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
             result = interface_method;
             break;
           }
@@ -158,8 +156,9 @@
       }
     }
   }
-  DCHECK(result == nullptr || HasSameNameAndSignature(
-      GetInterfaceMethodIfProxy(sizeof(void*)), result->GetInterfaceMethodIfProxy(sizeof(void*))));
+  DCHECK(result == nullptr ||
+         GetInterfaceMethodIfProxy(sizeof(void*))->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(sizeof(void*))));
   return result;
 }
 
@@ -299,7 +298,9 @@
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
         mirror::Throwable* pending_exception = nullptr;
-        self->PopDeoptimizationContext(result, &pending_exception);
+        bool from_code = false;
+        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
+        CHECK(!from_code);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
 
@@ -308,7 +309,7 @@
         if (pending_exception != nullptr) {
           self->SetException(pending_exception);
         }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
+        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 5a2d6c3..8efad88 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -263,6 +263,9 @@
   mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns true if this method has the same name and signature of the other method.
+  bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 95baa82..fc1a52f 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -236,7 +236,7 @@
 
   // Returns how large the table is after being written. If target is null, then no writing happens
   // but the size is still returned. Target must be 8 byte aligned.
-  size_t WriteToMemory(uint8_t* ptr) {
+  size_t WriteToMemory(uint8_t* ptr) const {
     size_t offset = 0;
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
@@ -457,7 +457,7 @@
   }
 
   // Make sure that everything reinserts in the right spot. Returns the number of errors.
-  size_t Verify() {
+  size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
     size_t errors = 0;
     for (size_t i = 0; i < num_buckets_; ++i) {
       T& element = data_[i];
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 2dd2a83..879544f 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1017,6 +1017,15 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
+  const ImageHeader& header = space->GetImageHeader();
+  const ImageSection& section = header.GetImageSection(ImageHeader::kSectionClassTable);
+  if (section.Size() > 0u) {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
+    class_table->ReadFromMemory(space->Begin() + section.Offset());
+    dex_cache_boot_image_class_lookup_required_ = false;
+  }
+
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
@@ -2786,9 +2795,11 @@
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
+
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(image_space);
   std::string temp;
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 3ed1c95..4656b74 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -115,7 +115,7 @@
   return false;
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
     const {
   std::string temp;
   return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
@@ -133,7 +133,7 @@
   return a.Read()->DescriptorEquals(descriptor);
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
@@ -148,4 +148,25 @@
   return true;
 }
 
+size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
+  size_t ret = 0;
+  for (const ClassSet& set : classes_) {
+    uint8_t* address = (ptr != nullptr) ? ptr + ret : nullptr;
+    ret += set.WriteToMemory(address);
+    // Sanity check 2.
+    if (kIsDebugBuild && ptr != nullptr) {
+      size_t read_count;
+      ClassSet class_set(ptr, /*make copy*/false, &read_count);
+      class_set.Verify();
+    }
+  }
+  return ret;
+}
+
+size_t ClassTable::ReadFromMemory(uint8_t* ptr) {
+  size_t read_count = 0;
+  classes_.insert(classes_.begin(), ClassSet(ptr, /*make copy*/false, &read_count));
+  return read_count;
+}
+
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 002bb56..219e2c6 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -104,17 +104,26 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  size_t WriteToMemory(uint8_t* ptr) const
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  size_t ReadFromMemory(uint8_t* ptr)
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   class ClassDescriptorHashEquals {
    public:
+    // uint32_t for cross compilation.
+    uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
-    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
         NO_THREAD_SAFETY_ANALYSIS;;
     // Same descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
-    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+    // uint32_t for cross compilation.
+    uint32_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
   };
   class GcRootEmptyFn {
    public:
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 32e77b7..51f57c3 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -69,10 +69,25 @@
   return alloc_record_count;
 }
 
+// Takes a method and returns a 'canonical' one if the method is default (and therefore potentially
+// copied from some other class). This ensures that the debugger does not get confused as to which
+// method we are in.
+static ArtMethod* GetCanonicalMethod(ArtMethod* m)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(!m->IsDefault())) {
+    return m;
+  } else {
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
+                                                      m->GetDexMethodIndex(),
+                                                      sizeof(void*));
+  }
+}
+
 class Breakpoint : public ValueObject {
  public:
   Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
-    : method_(method),
+    : method_(GetCanonicalMethod(method)),
       dex_pc_(dex_pc),
       deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
@@ -99,6 +114,12 @@
     return deoptimization_kind_;
   }
 
+  // Returns true if the method of this breakpoint and the passed in method should be considered the
+  // same. That is, they are either the same method or they are copied from the same method.
+  bool IsInMethod(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return method_ == GetCanonicalMethod(m);
+  }
+
  private:
   // The location of this breakpoint.
   ArtMethod* method_;
@@ -306,12 +327,12 @@
   return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
-static bool IsBreakpoint(const ArtMethod* m, uint32_t dex_pc)
+static bool IsBreakpoint(ArtMethod* m, uint32_t dex_pc)
     REQUIRES(!Locks::breakpoint_lock_)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
     }
@@ -1282,9 +1303,9 @@
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
-static JDWP::MethodId ToMethodId(const ArtMethod* m)
+static JDWP::MethodId ToMethodId(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
+  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(GetCanonicalMethod(m)));
 }
 
 static ArtField* FromFieldId(JDWP::FieldId fid)
@@ -2763,7 +2784,7 @@
   if (m == nullptr) {
     memset(location, 0, sizeof(*location));
   } else {
-    location->method = m;
+    location->method = GetCanonicalMethod(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
   }
 }
@@ -3214,7 +3235,7 @@
 static const Breakpoint* FindFirstBreakpointForMethod(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       return &breakpoint;
     }
   }
@@ -3231,7 +3252,7 @@
                                            DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
     }
   }
@@ -3274,12 +3295,15 @@
 
   if (first_breakpoint == nullptr) {
     // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // inlined or default, we deoptimize everything; otherwise we deoptimize only this method. We
+    // deoptimize with defaults because we do not know everywhere they are used. It is possible some
+    // of the copies could be inlined or otherwise missed.
+    // TODO Deoptimizing on default methods might not be necessary in all cases.
     // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
     // Therefore we must not hold any lock when we call it.
-    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    bool need_full_deoptimization = m->IsDefault() || IsMethodPossiblyInlined(self, m);
     if (need_full_deoptimization) {
-      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+      VLOG(jdwp) << "Need full deoptimization because of possible inlining or copying of method "
                  << PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
@@ -3359,7 +3383,7 @@
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
   DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
       deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
       DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 21e4e44..dccb1da 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -598,8 +598,12 @@
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+    mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+    if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+      // The super class does not have the method.
+      return nullptr;
+    }
+    return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index dfd9fcd..c019cae 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -52,7 +52,7 @@
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
-  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
 
   QuickExceptionHandler exception_handler(self, true);
   exception_handler.DeoptimizeSingleFrame();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index abf9ac4..2c8ed88 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -685,7 +685,9 @@
     }
 
     mirror::Throwable* pending_exception = nullptr;
-    self->PopDeoptimizationContext(&result, &pending_exception);
+    bool from_code = false;
+    self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
+    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -712,7 +714,7 @@
     if (pending_exception != nullptr) {
       self->SetException(pending_exception);
     }
-    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
@@ -754,7 +756,8 @@
   if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
-    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+    self->PushDeoptimizationContext(
+        result, shorty[0] == 'L', /* from_code */ false, self->GetException());
 
     // Set special exception to cause deoptimization.
     self->SetException(Thread::GetDeoptimizationException());
@@ -1019,15 +1022,21 @@
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
         << PrettyMethod(called) << " " << invoke_type;
-    if (virtual_or_interface) {
-      // Refine called method based on receiver.
-      CHECK(receiver != nullptr) << invoke_type;
-
+    if (virtual_or_interface || invoke_type == kSuper) {
+      // Refine called method based on receiver for kVirtual/kInterface, and
+      // caller for kSuper.
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
-      } else {
+      } else if (invoke_type == kInterface) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+      } else {
+        DCHECK_EQ(invoke_type, kSuper);
+        CHECK(caller != nullptr) << invoke_type;
+        called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
+            called->GetMethodIndex(), sizeof(void*));
       }
 
       CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 9c8e4df..7d00094 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1526,10 +1526,9 @@
   }
 }
 
+// Below may be called by mutator itself just before thread termination.
 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
-  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  ReaderMutexLock wmu(self, bulk_free_lock_);
   size_t free_bytes = 0U;
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
@@ -1544,10 +1543,17 @@
       // Count the number of free slots left.
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
+      // The above bracket index lock guards thread local free list to avoid race condition
+      // with unioning bulk free list to thread local free list by GC thread in BulkFree.
+      // If thread local run is true, GC thread will help update thread local free list
+      // in BulkFree. And the latest thread local free list will be merged to free list
+      // either when this thread local run is full or when revoking this run here. In this
+      // case the free list wll be updated. If thread local run is false, GC thread will help
+      // merge bulk free list in next BulkFree.
+      // Thus no need to merge bulk free list to free list again here.
       bool dont_care;
       thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
diff --git a/runtime/image.cc b/runtime/image.cc
index 1bc19ff..2eac3fb 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '3', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index 555cf5d..a16f3c9 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -170,6 +170,7 @@
     kSectionArtMethods,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
+    kSectionClassTable,
     kSectionImageBitmap,
     kSectionCount,  // Number of elements in enum.
   };
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index bc2c197..264cd2c 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1062,7 +1062,9 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+    self->PushDeoptimizationContext(return_value,
+                                    return_shorty == 'L',
+                                    false /* from_code */,
                                     nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d686f74..871fad7 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -397,7 +397,10 @@
   self->PopShadowFrame();
 }
 
-void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JValue* ret_val)
+void EnterInterpreterFromDeoptimize(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    bool from_code,
+                                    JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
@@ -408,7 +411,7 @@
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
-    uint32_t new_dex_pc;
+    uint32_t new_dex_pc = dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
       // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
       // the instrumentation. To prevent from reporting it a second time, we simply pass a
@@ -419,11 +422,16 @@
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
                                   // or DexFile::kDexNoIndex if there is none.
-    } else {
-      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      // For an invoke, use the dex pc of the next instruction.
+    } else if (!from_code) {
+      // For the debugger and full deoptimization stack, we must go past the invoke
+      // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
-      new_dex_pc = dex_pc + (instr->IsInvoke() ? instr->SizeInCodeUnits() : 0);
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      DCHECK(instr->IsInvoke());
+      new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+    } else {
+      // Nothing to do, the dex_pc is the one at which the code requested
+      // the deoptimization.
     }
     if (new_dex_pc != DexFile::kDexNoIndex) {
       shadow_frame->SetDexPC(new_dex_pc);
@@ -432,6 +440,8 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    // Following deoptimizations of shadow frames must pass the invoke instruction.
+    from_code = false;
     first = false;
   }
   ret_val->SetJ(value.GetJ());
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index b21ea84..8e7f3da 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -37,7 +37,8 @@
                                        mirror::Object* receiver, uint32_t* args, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame,
+// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
+extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
                                            JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 27a0e2d..92aa86e 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -142,11 +142,24 @@
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
   DCHECK(!method->IsRuntimeMethod());
+  // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
     return false;
   }
-  return jit_compile_method_(jit_compiler_handle_, method, self);
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    return false;
+  }
+
+  if (!code_cache_->NotifyCompilationOf(method, self)) {
+    return false;
+  }
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  code_cache_->DoneCompiling(method, self);
+  return success;
 }
 
 void Jit::CreateThreadPool() {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 804d69f..3342e92 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -536,7 +536,9 @@
       instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
+      if (!info->IsMethodBeingCompiled()) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+      }
     }
   }
 
@@ -577,12 +579,17 @@
       }
     }
 
-    // Free all profiling info.
-    for (ProfilingInfo* info : profiling_infos_) {
-      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
-    }
-    profiling_infos_.clear();
+    void* data_mspace = data_mspace_;
+    // Free all profiling infos of methods that were not being compiled.
+    auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
+      [data_mspace] (ProfilingInfo* info) {
+        if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
+          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          return true;
+        }
+        return false;
+      });
+    profiling_infos_.erase(profiling_kept_end, profiling_infos_.end());
 
     live_bitmap_.reset(nullptr);
     has_done_one_collection_ = true;
@@ -643,7 +650,7 @@
                                                       ArtMethod* method,
                                                       const std::vector<uint32_t>& entries) {
   size_t profile_info_size = RoundUp(
-      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
       sizeof(void*));
   ScopedThreadSuspension sts(self, kSuspended);
   MutexLock mu(self, lock_);
@@ -694,5 +701,25 @@
   MutexLock mu(Thread::Current(), lock_);
   return last_update_time_ns_;
 }
+
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
+  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info == nullptr || info->IsMethodBeingCompiled()) {
+    return false;
+  }
+  info->SetIsMethodBeingCompiled(true);
+  return true;
+}
+
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  DCHECK(info->IsMethodBeingCompiled());
+  info->SetIsMethodBeingCompiled(false);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index acd7c62..4032c7b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -66,6 +66,14 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompiling(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2e52b1b..dcb346c 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -54,28 +54,29 @@
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
-  // object, it will never be filled.
-  if (entries.empty()) {
-    return true;
-  }
+  // We always create a `ProfilingInfo` object, even if there is no instruction we are
+  // interested in. The JIT code cache internally uses it.
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
   return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+InlineCache* ProfilingInfo::GetInlineCache(uint32_t dex_pc) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-    if (cache_[i].dex_pc == dex_pc) {
+    if (cache_[i].dex_pc_ == dex_pc) {
       cache = &cache_[i];
       break;
     }
   }
   DCHECK(cache != nullptr);
+  return cache;
+}
 
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = GetInlineCache(dex_pc);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index b13a315..ddaf02f 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -25,6 +25,7 @@
 namespace art {
 
 class ArtMethod;
+class ProfilingInfo;
 
 namespace jit {
 class JitCodeCache;
@@ -34,6 +35,49 @@
 class Class;
 }
 
+// Structure to store the classes seen at runtime for a specific instruction.
+// Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+class InlineCache {
+ public:
+  bool IsMonomorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 2);
+    return !classes_[0].IsNull() && classes_[1].IsNull();
+  }
+
+  bool IsMegamorphic() const {
+    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+      if (classes_[i].IsNull()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we cannot ensure the inline cache is actually monomorphic
+    // at this point, as other threads may have updated it.
+    return classes_[0].Read();
+  }
+
+  bool IsUnitialized() const {
+    return classes_[0].IsNull();
+  }
+
+  bool IsPolymorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 3);
+    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+  }
+
+ private:
+  static constexpr uint16_t kIndividualCacheSize = 5;
+  uint32_t dex_pc_;
+  GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+
+  friend class ProfilingInfo;
+
+  DISALLOW_COPY_AND_ASSIGN(InlineCache);
+};
+
 /**
  * Profiling info for a method, created and filled by the interpreter once the
  * method is warm, and used by the compiler to drive optimizations.
@@ -67,44 +111,24 @@
     return method_;
   }
 
+  InlineCache* GetInlineCache(uint32_t dex_pc);
+
+  bool IsMethodBeingCompiled() const {
+    return is_method_being_compiled_;
+  }
+
+  void SetIsMethodBeingCompiled(bool value) {
+    is_method_being_compiled_ = value;
+  }
+
  private:
-  // Structure to store the classes seen at runtime for a specific instruction.
-  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
-  struct InlineCache {
-    bool IsMonomorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 2);
-      return !classes_[0].IsNull() && classes_[1].IsNull();
-    }
-
-    bool IsMegamorphic() const {
-      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-        if (classes_[i].IsNull()) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    bool IsUnitialized() const {
-      return classes_[0].IsNull();
-    }
-
-    bool IsPolymorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 3);
-      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-    }
-
-    static constexpr uint16_t kIndividualCacheSize = 5;
-    uint32_t dex_pc;
-    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
-  };
-
   ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
       : number_of_inline_caches_(entries.size()),
-        method_(method) {
+        method_(method),
+        is_method_being_compiled_(false) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc = entries[i];
+      cache_[i].dex_pc_ = entries[i];
     }
   }
 
@@ -114,6 +138,11 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Whether the ArtMethod is currently being compiled. This flag
+  // is implicitly guarded by the JIT code cache lock.
+  // TODO: Make the JIT code cache lock global.
+  bool is_method_being_compiled_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 649df5f..d1687d7 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -2210,4 +2210,55 @@
   check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
 }
 
+static bool IsLocked(JNIEnv* env, jobject jobj) {
+  ScopedObjectAccess soa(env);
+  LockWord lock_word = soa.Decode<mirror::Object*>(jobj)->GetLockWord(true);
+  switch (lock_word.GetState()) {
+    case LockWord::kHashCode:
+    case LockWord::kUnlocked:
+      return false;
+    case LockWord::kThinLocked:
+      return true;
+    case LockWord::kFatLocked:
+      return lock_word.FatLockMonitor()->IsLocked();
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      UNREACHABLE();
+    }
+  }
+}
+
+TEST_F(JniInternalTest, DetachThreadUnlockJNIMonitors) {
+  // We need to lock an object, detach, reattach, and check the locks.
+  //
+  // As re-attaching will create a different thread, we need to use a global
+  // ref to keep the object around.
+
+  // Create an object to torture.
+  jobject global_ref;
+  {
+    jclass object_class = env_->FindClass("java/lang/Object");
+    ASSERT_NE(object_class, nullptr);
+    jobject object = env_->AllocObject(object_class);
+    ASSERT_NE(object, nullptr);
+    global_ref = env_->NewGlobalRef(object);
+  }
+
+  // Lock it.
+  env_->MonitorEnter(global_ref);
+  ASSERT_TRUE(IsLocked(env_, global_ref));
+
+  // Detach and re-attach.
+  jint detach_result = vm_->DetachCurrentThread();
+  ASSERT_EQ(detach_result, JNI_OK);
+  jint attach_result = vm_->AttachCurrentThread(&env_, nullptr);
+  ASSERT_EQ(attach_result, JNI_OK);
+
+  // Look at the global ref, check whether it's still locked.
+  ASSERT_FALSE(IsLocked(env_, global_ref));
+
+  // Delete the global ref.
+  env_->DeleteGlobalRef(global_ref);
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4b2ac20..e133847 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -154,8 +154,10 @@
   }
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to build process map");
+  if (map == nullptr) {
+    if (error_msg != nullptr) {
+      *error_msg = StringPrintf("Failed to build process map");
+    }
     return false;
   }
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
@@ -164,9 +166,11 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
-  *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
-                            "any existing map. See process maps in the log.", begin, end);
+  if (error_msg != nullptr) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
+    *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
+                              "any existing map. See process maps in the log.", begin, end);
+  }
   return false;
 }
 
@@ -239,15 +243,16 @@
   std::string error_detail;
   CheckNonOverlapping(expected, limit, &error_detail);
 
-  std::ostringstream os;
-  os <<  StringPrintf("Failed to mmap at expected address, mapped at "
-                      "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
-                      actual, expected);
-  if (!error_detail.empty()) {
-    os << " : " << error_detail;
+  if (error_msg != nullptr) {
+    std::ostringstream os;
+    os <<  StringPrintf("Failed to mmap at expected address, mapped at "
+                        "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
+                        actual, expected);
+    if (!error_detail.empty()) {
+      os << " : " << error_detail;
+    }
+    *error_msg = os.str();
   }
-
-  *error_msg = os.str();
   return false;
 }
 
@@ -379,7 +384,8 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg))
+        << ((error_msg != nullptr) ? *error_msg : std::string());
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -414,15 +420,17 @@
                                                            page_aligned_offset,
                                                            low_4gb));
   if (actual == MAP_FAILED) {
-    auto saved_errno = errno;
+    if (error_msg != nullptr) {
+      auto saved_errno = errno;
 
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
 
-    *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
-                              ") of file '%s' failed: %s. See process maps in the log.",
-                              page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename,
-                              strerror(saved_errno));
+      *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
+                                ") of file '%s' failed: %s. See process maps in the log.",
+                                page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
+                                static_cast<int64_t>(page_aligned_offset), filename,
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index a67a925..efce09a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -99,11 +99,12 @@
                             error_msg);
   }
 
-  // Map part of a file, taking care of non-page aligned offsets.  The
-  // "start" offset is absolute, not relative. This version allows
-  // requesting a specific address for the base of the
-  // mapping. "reuse" allows us to create a view into an existing
-  // mapping where we do not take ownership of the memory.
+  // Map part of a file, taking care of non-page aligned offsets.  The "start" offset is absolute,
+  // not relative. This version allows requesting a specific address for the base of the mapping.
+  // "reuse" allows us to create a view into an existing mapping where we do not take ownership of
+  // the memory. If error_msg is null then we do not print /proc/maps to the log if
+  // MapFileAtAddress fails. This helps improve performance of the fail case since reading and
+  // printing /proc/maps takes several milliseconds in the worst case.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
   static MemMap* MapFileAtAddress(uint8_t* addr,
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 680f4ac..83e594b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -52,10 +52,10 @@
 
 namespace art {
 
-// Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
+// Whether OatFile::Open will try dlopen. Fallback is our own ELF loader.
 static constexpr bool kUseDlopen = true;
 
-// Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
+// Whether OatFile::Open will try dlopen on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
@@ -64,348 +64,140 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
-std::string OatFile::ResolveRelativeEncodedDexLocation(
-      const char* abs_dex_location, const std::string& rel_dex_location) {
-  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
-    // Strip :classes<N>.dex used for secondary multidex files.
-    std::string base = DexFile::GetBaseLocation(rel_dex_location);
-    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+// Note for OatFileBase and descendents:
+//
+// These are used in OatFile::Open to try all our loaders.
+//
+// The process is simple:
+//
+// 1) Allocate an instance through the standard constructor (location, executable)
+// 2) Load() to try to open the file.
+// 3) ComputeFields() to populate the OatFile fields like begin_, using FindDynamicSymbolAddress.
+// 4) PreSetup() for any steps that should be done before the final setup.
+// 5) Setup() to complete the procedure.
 
-    // Check if the base is a suffix of the provided abs_dex_location.
-    std::string target_suffix = "/" + base;
-    std::string abs_location(abs_dex_location);
-    if (abs_location.size() > target_suffix.size()) {
-      size_t pos = abs_location.size() - target_suffix.size();
-      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
-        return abs_location + multidex_suffix;
-      }
-    }
-  }
-  return rel_dex_location;
-}
+class OatFileBase : public OatFile {
+ public:
+  virtual ~OatFileBase() {}
 
-void OatFile::CheckLocation(const std::string& location) {
-  CHECK(!location.empty());
-}
-
-OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+  template <typename kOatFileBaseSubType>
+  static OatFileBase* OpenOatFile(const std::string& elf_filename,
                                   const std::string& location,
+                                  uint8_t* requested_base,
+                                  uint8_t* oat_file_begin,
+                                  bool writable,
+                                  bool executable,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
-  oat_file->elf_file_.reset(elf_file);
-  uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
-  CHECK(has_section);
-  oat_file->begin_ = elf_file->Begin() + offset;
-  oat_file->end_ = elf_file->Begin() + size + offset;
-  // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
-}
+                                  std::string* error_msg);
 
-OatFile* OatFile::Open(const std::string& filename,
-                       const std::string& location,
-                       uint8_t* requested_base,
-                       uint8_t* oat_file_begin,
-                       bool executable,
-                       const char* abs_dex_location,
-                       std::string* error_msg) {
-  CHECK(!filename.empty()) << location;
-  CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+ protected:
+  OatFileBase(const std::string& filename, bool executable) : OatFile(filename, executable) {}
 
-  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
-  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
-  //       !executable is a sign that we may want to patch), which may not be allowed for
-  //       various reasons.
-  // dlopen always returns the same library if it is already opened on the host. For this reason
-  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
-  // the same library loaded multiple times at different addresses is required for class unloading
-  // and for having dex caches arrays in the .bss section.
-  Runtime* const runtime = Runtime::Current();
-  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
-  if (kUseDlopen && executable) {
-    bool success = kIsTargetBuild;
-    bool reserved_location = false;
-      // Manager may be null if we are running without a runtime.
-    if (!success && kUseDlopenOnHost && manager != nullptr) {
-      // RegisterOatFileLocation returns false if we are not the first caller to register that
-      // location.
-      reserved_location = manager->RegisterOatFileLocation(location);
-      success = reserved_location;
-    }
-    if (success) {
-      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-      // this will register the oat file with the linker and allows libunwind to find our info.
-      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-      if (reserved_location) {
-        manager->UnRegisterOatFileLocation(location);
-      }
-      if (ret != nullptr) {
-        return ret.release();
-      }
-      if (kPrintDlOpenErrorMessage) {
-        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
-      }
-    }
+  virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                                  std::string* error_msg) const = 0;
+
+  virtual bool Load(const std::string& elf_filename,
+                    uint8_t* oat_file_begin,
+                    bool writable,
+                    bool executable,
+                    std::string* error_msg) = 0;
+
+  bool ComputeFields(uint8_t* requested_base,
+                     const std::string& file_path,
+                     std::string* error_msg);
+
+  virtual void PreSetup(const std::string& elf_filename) = 0;
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
+
+  // Setters exposed for ElfOatFile.
+
+  void SetBegin(const uint8_t* begin) {
+    begin_ = begin;
   }
 
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  //
-  //
-  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
-  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+  void SetEnd(const uint8_t* end) {
+    end_ = end;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OatFileBase);
+};
+
+template <typename kOatFileBaseSubType>
+OatFileBase* OatFileBase::OpenOatFile(const std::string& elf_filename,
+                                      const std::string& location,
+                                      uint8_t* requested_base,
+                                      uint8_t* oat_file_begin,
+                                      bool writable,
+                                      bool executable,
+                                      const char* abs_dex_location,
+                                      std::string* error_msg) {
+  std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+  if (!ret->Load(elf_filename,
+                 oat_file_begin,
+                 writable,
+                 executable,
+                 error_msg)) {
     return nullptr;
   }
-  ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        abs_dex_location, error_msg));
 
-  // It would be nice to unlink here. But we might have opened the file created by the
-  // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
-  // to allow removal when we know the ELF must be borked.
+  if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
+    return nullptr;
+  }
+
+  ret->PreSetup(elf_filename);
+
+  if (!ret->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
-  if (!success) {
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile* OatFile::OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
-  bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
-  if (!success) {
-    CHECK(!error_msg->empty());
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
-      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
-  CHECK(!location_.empty());
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().RegisterOatFileLocation(location);
-  }
-}
-
-OatFile::~OatFile() {
-  STLDeleteElements(&oat_dex_files_storage_);
-  if (dlopen_handle_ != nullptr) {
-    dlclose(dlopen_handle_);
-  }
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
-  }
-}
-
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
-#ifdef __APPLE__
-  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
-  // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
-  return false;
-#else
-  {
-    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
-    if (absolute_path == nullptr) {
-      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
-      return false;
-    }
-#ifdef __ANDROID__
-    android_dlextinfo extinfo;
-    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
-    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
-#else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
-#endif
-  }
-  if (dlopen_handle_ == nullptr) {
-    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
-    return false;
-  }
-  begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatdata"));
+bool OatFileBase::ComputeFields(uint8_t* requested_base,
+                                const std::string& file_path,
+                                std::string* error_msg) {
+  std::string symbol_error_msg;
+  begin_ = FindDynamicSymbolAddress("oatdata", &symbol_error_msg);
   if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p, %s. See process maps in the log.",
-                              begin_, requested_base, elf_filename.c_str());
+        "oatdata=%p != expected=%p. See process maps in the log.",
+        begin_, requested_base);
     return false;
   }
-  end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatlastword"));
+  end_ = FindDynamicSymbolAddress("oatlastword", &symbol_error_msg);
   if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbss"));
+  bss_begin_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbss", &symbol_error_msg));
   if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
+    // No .bss section.
     bss_end_ = nullptr;
-    dlerror();
   } else {
-    bss_end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbsslastword"));
+    bss_end_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbsslastword", &symbol_error_msg));
     if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                elf_filename.c_str());
+      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file_path.c_str());
       return false;
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
   }
 
-  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
-  struct dl_iterate_context {
-    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
-      auto* context = reinterpret_cast<dl_iterate_context*>(data);
-      // See whether this callback corresponds to the file which we have just loaded.
-      bool contains_begin = false;
-      for (int i = 0; i < info->dlpi_phnum; i++) {
-        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                      info->dlpi_phdr[i].p_vaddr);
-          size_t memsz = info->dlpi_phdr[i].p_memsz;
-          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
-            contains_begin = true;
-            break;
-          }
-        }
-      }
-      // Add dummy mmaps for this file.
-      if (contains_begin) {
-        for (int i = 0; i < info->dlpi_phnum; i++) {
-          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                        info->dlpi_phdr[i].p_vaddr);
-            size_t memsz = info->dlpi_phdr[i].p_memsz;
-            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
-            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
-          }
-        }
-        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
-      }
-      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
-    }
-    const uint8_t* const begin_;
-    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { begin_, &dlopen_mmaps_ };
-
-  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
-  }
-
-  return Setup(abs_dex_location, error_msg);
-#endif  // __APPLE__
-}
-
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
-                          const char* abs_dex_location,
-                          std::string* error_msg) {
-  // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
-                                oat_file_begin));
-  if (elf_file_ == nullptr) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  bool loaded = elf_file_->Load(executable, error_msg);
-  if (!loaded) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  begin_ = elf_file_->FindDynamicSymbolAddress("oatdata");
-  if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p. See process maps in the log.",
-                              begin_, requested_base);
-    return false;
-  }
-  end_ = elf_file_->FindDynamicSymbolAddress("oatlastword");
-  if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  // Readjust to be non-inclusive upper bound.
-  end_ += sizeof(uint32_t);
-
-  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
-  if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
-    bss_end_ = nullptr;
-    dlerror();
-  } else {
-    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
-    if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                file->GetPath().c_str());
-      return false;
-    }
-    // Readjust to be non-inclusive upper bound.
-    bss_end_ += sizeof(uint32_t);
-  }
-
-  return Setup(abs_dex_location, error_msg);
+  return true;
 }
 
 // Read an unaligned entry from the OatDexFile data in OatFile and advance the read
@@ -428,7 +220,7 @@
   return true;
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s",
@@ -630,6 +422,486 @@
   return true;
 }
 
+////////////////////////
+// OatFile via dlopen //
+////////////////////////
+
+static bool RegisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
+    }
+    return false;
+  }
+  return true;
+}
+
+static void UnregisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
+    }
+  }
+}
+
+class DlOpenOatFile FINAL : public OatFileBase {
+ public:
+  DlOpenOatFile(const std::string& filename, bool executable)
+      : OatFileBase(filename, executable),
+        dlopen_handle_(nullptr),
+        first_oat_(RegisterOatFileLocation(filename)) {
+  }
+
+  ~DlOpenOatFile() {
+    if (dlopen_handle_ != nullptr) {
+      dlclose(dlopen_handle_);
+    }
+    UnregisterOatFileLocation(GetLocation());
+  }
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr =
+        reinterpret_cast<const uint8_t*>(dlsym(dlopen_handle_, symbol_name.c_str()));
+    if (ptr == nullptr) {
+      *error_msg = dlerror();
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  void PreSetup(const std::string& elf_filename) OVERRIDE;
+
+ private:
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* oat_file_begin,
+              std::string* error_msg);
+
+  // dlopen handle during runtime.
+  void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
+
+  // Dummy memory map objects corresponding to the regions mapped by dlopen.
+  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
+
+  // Track the registration status (= was this the first oat file) for the location.
+  const bool first_oat_;
+
+  DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
+};
+
+bool DlOpenOatFile::Load(const std::string& elf_filename,
+                         uint8_t* oat_file_begin,
+                         bool writable,
+                         bool executable,
+                         std::string* error_msg) {
+  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
+  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
+  //       !executable is a sign that we may want to patch), which may not be allowed for
+  //       various reasons.
+  if (!kUseDlopen) {
+    *error_msg = "DlOpen is disabled.";
+    return false;
+  }
+  if (writable) {
+    *error_msg = "DlOpen does not support writable loading.";
+    return false;
+  }
+  if (!executable) {
+    *error_msg = "DlOpen does not support non-executable loading.";
+    return false;
+  }
+
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  if (!kIsTargetBuild) {
+    if (!kUseDlopenOnHost) {
+      *error_msg = "DlOpen disabled for host.";
+      return false;
+    }
+    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
+    // stored in the first_oat_ flag.
+    if (!first_oat_) {
+      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
+      return false;
+    }
+  }
+
+  bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
+  DCHECK(dlopen_handle_ != nullptr || !success);
+
+  return success;
+}
+
+bool DlOpenOatFile::Dlopen(const std::string& elf_filename,
+                           uint8_t* oat_file_begin,
+                           std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename, oat_file_begin);
+  *error_msg = "Dlopen unsupported on Mac.";
+  return false;
+#else
+  {
+    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
+    if (absolute_path == nullptr) {
+      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
+      return false;
+    }
+#ifdef __ANDROID__
+    android_dlextinfo extinfo;
+    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
+                                                                //   (open oat files multiple
+                                                                //    times).
+                    ANDROID_DLEXT_FORCE_FIXED_VADDR;            // Take a non-zero vaddr as absolute
+                                                                //   (non-pic boot image).
+    if (oat_file_begin != nullptr) {                            //
+      extinfo.flags |= ANDROID_DLEXT_LOAD_AT_FIXED_ADDRESS;     // Use the requested addr if
+      extinfo.reserved_addr = oat_file_begin;                   // vaddr = 0.
+    }                                                           //   (pic boot image).
+    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
+#else
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    UNUSED(oat_file_begin);
+#endif
+  }
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
+    return false;
+  }
+  return true;
+#endif
+}
+
+void DlOpenOatFile::PreSetup(const std::string& elf_filename) {
+#ifdef __APPLE__
+  UNUSED(elf_filename);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      // See whether this callback corresponds to the file which we have just loaded.
+      bool contains_begin = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+              info->dlpi_phdr[i].p_vaddr);
+          size_t memsz = info->dlpi_phdr[i].p_memsz;
+          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
+            contains_begin = true;
+            break;
+          }
+        }
+      }
+      // Add dummy mmaps for this file.
+      if (contains_begin) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+                info->dlpi_phdr[i].p_vaddr);
+            size_t memsz = info->dlpi_phdr[i].p_memsz;
+            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
+            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
+          }
+        }
+        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
+      }
+      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
+    }
+    const uint8_t* const begin_;
+    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
+  } context = { Begin(), &dlopen_mmaps_ };
+
+  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+  }
+#endif
+}
+
+////////////////////////////////////////////////
+// OatFile via our own ElfFile implementation //
+////////////////////////////////////////////////
+
+class ElfOatFile FINAL : public OatFileBase {
+ public:
+  ElfOatFile(const std::string& filename, bool executable) : OatFileBase(filename, executable) {}
+
+  static ElfOatFile* OpenElfFile(File* file,
+                                 const std::string& location,
+                                 uint8_t* requested_base,
+                                 uint8_t* oat_file_begin,  // Override base if not null
+                                 bool writable,
+                                 bool executable,
+                                 const char* abs_dex_location,
+                                 std::string* error_msg);
+
+  bool InitializeFromElfFile(ElfFile* elf_file,
+                             const char* abs_dex_location,
+                             std::string* error_msg);
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr = elf_file_->FindDynamicSymbolAddress(symbol_name);
+    if (ptr == nullptr) {
+      *error_msg = "(Internal implementation could not find symbol)";
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  void PreSetup(const std::string& elf_filename ATTRIBUTE_UNUSED) OVERRIDE {
+  }
+
+ private:
+  bool ElfFileOpen(File* file,
+                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                   bool writable,
+                   bool executable,
+                   std::string* error_msg);
+
+ private:
+  // Backing memory map for oat file during cross compilation.
+  std::unique_ptr<ElfFile> elf_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfOatFile);
+};
+
+ElfOatFile* ElfOatFile::OpenElfFile(File* file,
+                                    const std::string& location,
+                                    uint8_t* requested_base,
+                                    uint8_t* oat_file_begin,  // Override base if not null
+                                    bool writable,
+                                    bool executable,
+                                    const char* abs_dex_location,
+                                    std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, executable));
+  bool success = oat_file->ElfFileOpen(file, oat_file_begin, writable, executable, error_msg);
+  if (!success) {
+    CHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  // Complete the setup.
+  if (!oat_file->ComputeFields(requested_base, file->GetPath(), error_msg)) {
+    return nullptr;
+  }
+
+  if (!oat_file->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
+  return oat_file.release();
+}
+
+bool ElfOatFile::InitializeFromElfFile(ElfFile* elf_file,
+                                       const char* abs_dex_location,
+                                       std::string* error_msg) {
+  if (IsExecutable()) {
+    *error_msg = "Cannot initialize from elf file in executable mode.";
+    return false;
+  }
+  elf_file_.reset(elf_file);
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  SetBegin(elf_file->Begin() + offset);
+  SetEnd(elf_file->Begin() + size + offset);
+  // Ignore the optional .bss section when opening non-executable.
+  return Setup(abs_dex_location, error_msg);
+}
+
+bool ElfOatFile::Load(const std::string& elf_filename,
+                      uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                      bool writable,
+                      bool executable,
+                      std::string* error_msg) {
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  if (file == nullptr) {
+    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+    return false;
+  }
+  return ElfOatFile::ElfFileOpen(file.get(),
+                                 oat_file_begin,
+                                 writable,
+                                 executable,
+                                 error_msg);
+}
+
+bool ElfOatFile::ElfFileOpen(File* file,
+                             uint8_t* oat_file_begin,
+                             bool writable,
+                             bool executable,
+                             std::string* error_msg) {
+  // TODO: rename requested_base to oat_data_begin
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                error_msg,
+                                oat_file_begin));
+  if (elf_file_ == nullptr) {
+    DCHECK(!error_msg->empty());
+    return false;
+  }
+  bool loaded = elf_file_->Load(executable, error_msg);
+  DCHECK(loaded || !error_msg->empty());
+  return loaded;
+}
+
+//////////////////////////
+// General OatFile code //
+//////////////////////////
+
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
+static void CheckLocation(const std::string& location) {
+  CHECK(!location.empty());
+}
+
+OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
+                                  const char* abs_dex_location,
+                                  std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, false /* executable */));
+  return oat_file->InitializeFromElfFile(elf_file, abs_dex_location, error_msg)
+      ? oat_file.release()
+      : nullptr;
+}
+
+OatFile* OatFile::Open(const std::string& filename,
+                       const std::string& location,
+                       uint8_t* requested_base,
+                       uint8_t* oat_file_begin,
+                       bool executable,
+                       const char* abs_dex_location,
+                       std::string* error_msg) {
+  CHECK(!filename.empty()) << location;
+  CheckLocation(location);
+  std::unique_ptr<OatFile> ret;
+
+  // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
+  // disabled.
+  OatFile* with_dlopen = OatFileBase::OpenOatFile<DlOpenOatFile>(filename,
+                                                                 location,
+                                                                 requested_base,
+                                                                 oat_file_begin,
+                                                                 false,
+                                                                 executable,
+                                                                 abs_dex_location,
+                                                                 error_msg);
+  if (with_dlopen != nullptr) {
+    return with_dlopen;
+  }
+  if (kPrintDlOpenErrorMessage) {
+    LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+  }
+
+  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+  //
+  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+  //
+  // We use our own ELF loader for Quick to deal with legacy apps that
+  // open a generated dex file by name, remove the file, then open
+  // another generated dex file with the same name. http://b/10614658
+  //
+  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+  //
+  //
+  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
+  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
+  OatFile* with_internal = OatFileBase::OpenOatFile<ElfOatFile>(filename,
+                                                                location,
+                                                                requested_base,
+                                                                oat_file_begin,
+                                                                false,
+                                                                executable,
+                                                                abs_dex_location,
+                                                                error_msg);
+  return with_internal;
+}
+
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 true,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 false,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile::OatFile(const std::string& location, bool is_executable)
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
+  CHECK(!location_.empty());
+}
+
+OatFile::~OatFile() {
+  STLDeleteElements(&oat_dex_files_storage_);
+}
+
 const OatHeader& OatFile::GetOatHeader() const {
   return *reinterpret_cast<const OatHeader*>(Begin());
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 0a77654..dbd7541 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,7 +40,7 @@
 class OatHeader;
 class OatDexFile;
 
-class OatFile FINAL {
+class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
 
@@ -74,7 +74,7 @@
                                const char* abs_dex_location,
                                std::string* error_msg);
 
-  ~OatFile();
+  virtual ~OatFile();
 
   bool IsExecutable() const {
     return is_executable_;
@@ -85,12 +85,6 @@
   // Indicates whether the oat file was compiled with full debugging capability.
   bool IsDebuggable() const;
 
-  ElfFile* GetElfFile() const {
-    CHECK_NE(reinterpret_cast<uintptr_t>(elf_file_.get()), reinterpret_cast<uintptr_t>(nullptr))
-        << "Cannot get an elf file from " << GetLocation();
-    return elf_file_.get();
-  }
-
   const std::string& GetLocation() const {
     return location_;
   }
@@ -260,35 +254,10 @@
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
                                               std::vector<std::string>* locations);
 
+ protected:
+  OatFile(const std::string& filename, bool executable);
+
  private:
-  static void CheckLocation(const std::string& location);
-
-  static OatFile* OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg);
-
-  static OatFile* OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,  // Override base if not null
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg);
-
-  explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
-                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
-                   const char* abs_dex_location,
-                   std::string* error_msg);
-
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
-
   // The oat file name.
   //
   // The image will embed this to link its associated oat file.
@@ -309,18 +278,6 @@
   // Was this oat_file loaded executable?
   const bool is_executable_;
 
-  // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  std::unique_ptr<MemMap> mem_map_;
-
-  // Backing memory map for oat file during cross compilation.
-  std::unique_ptr<ElfFile> elf_file_;
-
-  // dlopen handle during runtime.
-  void* dlopen_handle_;
-
-  // Dummy memory map objects corresponding to the regions mapped by dlopen.
-  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
-
   // Owning storage for the OatDexFile objects.
   std::vector<const OatDexFile*> oat_dex_files_storage_;
 
@@ -356,6 +313,7 @@
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatFile);
 };
 
@@ -426,6 +384,7 @@
   uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 585c7c4..7f4519c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -559,7 +559,9 @@
     args.Set(M::Image, image);
   }
 
-  if (args.GetOrDefault(M::HeapGrowthLimit) == 0u) {  // 0 means no growth limit
+  // 0 means no growth limit, and growth limit should be always <= heap size
+  if (args.GetOrDefault(M::HeapGrowthLimit) <= 0u ||
+      args.GetOrDefault(M::HeapGrowthLimit) > args.GetOrDefault(M::MemoryMaximumSize)) {
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 1552318..9cb37ee 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -283,7 +283,12 @@
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
-        single_frame_done_(false) {
+        single_frame_done_(false),
+        single_frame_deopt_method_(nullptr) {
+  }
+
+  ArtMethod* GetSingleFrameDeoptMethod() const {
+    return single_frame_deopt_method_;
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -356,6 +361,7 @@
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
+        single_frame_deopt_method_ = method;
       }
       return true;
     }
@@ -586,6 +592,7 @@
   bool stacked_shadow_frame_pushed_;
   const bool single_frame_deopt_;
   bool single_frame_done_;
+  ArtMethod* single_frame_deopt_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -614,6 +621,14 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
+  // Compiled code made an explicit deoptimization. Transfer the code
+  // to interpreter and clear the counter to JIT the method again.
+  ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
+  DCHECK(deopt_method != nullptr);
+  deopt_method->ClearCounter();
+  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+      deopt_method, GetQuickToInterpreterBridge());
+
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
   #ifdef __LP64__
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 931e581..fe8eb0d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -775,7 +775,7 @@
   std::unique_ptr<const OatFile> oat_file(
       OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
   if (oat_file == nullptr) {
-    LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
+    LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 63e6326..90539b4 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -164,14 +164,20 @@
 
 class DeoptimizationContextRecord {
  public:
-  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+  DeoptimizationContextRecord(const JValue& ret_val,
+                              bool is_reference,
+                              bool from_code,
                               mirror::Throwable* pending_exception,
                               DeoptimizationContextRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+      : ret_val_(ret_val),
+        is_reference_(is_reference),
+        from_code_(from_code),
+        pending_exception_(pending_exception),
         link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
+  bool GetFromCode() const { return from_code_; }
   mirror::Throwable* GetPendingException() const { return pending_exception_; }
   DeoptimizationContextRecord* GetLink() const { return link_; }
   mirror::Object** GetReturnValueAsGCRoot() {
@@ -189,6 +195,9 @@
   // Indicates whether the returned value is a reference. If so, the GC will visit it.
   const bool is_reference_;
 
+  // Whether the context was created from an explicit deoptimization in the code.
+  const bool from_code_;
+
   // The exception that was pending before deoptimization (or null if there was no pending
   // exception).
   mirror::Throwable* pending_exception_;
@@ -220,22 +229,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+void Thread::PushDeoptimizationContext(const JValue& return_value,
+                                       bool is_reference,
+                                       bool from_code,
                                        mirror::Throwable* exception) {
   DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
       return_value,
       is_reference,
+      from_code,
       exception,
       tlsPtr_.deoptimization_context_stack);
   tlsPtr_.deoptimization_context_stack = record;
 }
 
-void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+void Thread::PopDeoptimizationContext(JValue* result,
+                                      mirror::Throwable** exception,
+                                      bool* from_code) {
   AssertHasDeoptimizationContext();
   DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
   tlsPtr_.deoptimization_context_stack = record->GetLink();
   result->SetJ(record->GetReturnValue().GetJ());
   *exception = record->GetPendingException();
+  *from_code = record->GetFromCode();
   delete record;
 }
 
@@ -2546,7 +2561,8 @@
     if (is_deoptimization) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
-      PushDeoptimizationContext(JValue(), false, exception);
+      PushDeoptimizationContext(
+          JValue(), /*is_reference */ false, /* from_code */ false, exception);
     }
   }
   // Don't leave exception visible while we try to find the handler, which may cause class
diff --git a/runtime/thread.h b/runtime/thread.h
index 4624f27..3abb3cf 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -849,10 +849,14 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+  // 'from_code' denotes whether the deoptimization was explicitly made from
+  // compiled code.
+  void PushDeoptimizationContext(const JValue& return_value,
+                                 bool is_reference,
+                                 bool from_code,
                                  mirror::Throwable* exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception, bool* from_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 5a11698..a2d6363 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -178,8 +178,8 @@
   return static_cast<int32_t>(hash);
 }
 
-size_t ComputeModifiedUtf8Hash(const char* chars) {
-  size_t hash = 0;
+uint32_t ComputeModifiedUtf8Hash(const char* chars) {
+  uint32_t hash = 0;
   while (*chars != '\0') {
     hash = hash * 31 + *chars++;
   }
diff --git a/runtime/utf.h b/runtime/utf.h
index 03158c4..4abd605 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -85,8 +85,8 @@
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
 
 // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a
-// size_t and hashes individual chars instead of codepoint words.
-size_t ComputeModifiedUtf8Hash(const char* chars);
+// uint32_t and hashes individual chars instead of codepoint words.
+uint32_t ComputeModifiedUtf8Hash(const char* chars);
 
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index c932761..0b87a4f 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -31,9 +31,9 @@
 30 (class java.lang.Integer)
 62 (class java.lang.Long)
 14 (class java.lang.Short)
-[public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
-[public native char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), native void java.lang.String.getCharsNoCheck(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), native void java.lang.String.setCharAt(int,char), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public native [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
+[java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
+[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char java.lang.String.REPLACEMENT_CHAR, private static final char[] java.lang.String.ASCII, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private char java.lang.String.foldCase(char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public native [C java.lang.String.toCharArray(), public native char java.lang.String.charAt(int), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index bf3a574..1245852 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -157,10 +157,28 @@
     System.out.println(o + " (" + (o != null ? o.getClass() : "null") + ")");
   }
 
+  /**
+   * Sorts the input array using the comparator and returns the sorted array.
+   */
+  private static Object[] sort(Object[] objects, Comparator<Object> comp) {
+    Arrays.sort(objects, comp);
+    return objects;
+  }
+
   public static void testMethodReflection() throws Exception {
-    System.out.println(Arrays.toString(String.class.getDeclaredConstructors()));
-    System.out.println(Arrays.toString(String.class.getDeclaredFields()));
-    System.out.println(Arrays.toString(String.class.getDeclaredMethods()));
+    Comparator<Object> comp = new Comparator<Object>() {
+      public int compare(Object a, Object b) {
+        return a.toString().compareTo(b.toString());
+      }
+      public boolean equals(Object b) {
+        return this == b;
+      }
+    };
+
+    // Sort the return values by their string values since the order is undefined by the spec.
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredConstructors(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredFields(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredMethods(), comp)));
 
     System.out.println(Arrays.toString(Main.class.getInterfaces()));
     System.out.println(Arrays.toString(String.class.getInterfaces()));
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index ffeae7d..c3d2759 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -652,20 +652,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -685,27 +684,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
@@ -725,25 +722,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end < array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
@@ -770,18 +767,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
@@ -816,14 +814,18 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  //  array.length is defined before the loop header so no phi is needed.
-  /// CHECK-NOT: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
@@ -831,8 +833,8 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // One HDeoptimize will be added.
-    // It's for (end - 2 <= array.length - 2).
+    // Several HDeoptimize will be added. Two for each index.
+    // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -859,7 +861,6 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-
   /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
@@ -874,23 +875,27 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
   /// CHECK: Goto
-  /// CHECK-NOT: Deoptimize
+  /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 2, one for end <= array.length - 3,
-    // and one for null check on array (to hoist null
-    // check and array.length out of loop).
+    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -914,20 +919,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo7(int[] array, int start, int end, boolean lowEnd) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. One for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -955,26 +959,28 @@
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK: Goto
+  /// CHECK: Goto
+  /// CHECK: If
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo8(int[][] matrix, int start, int end) {
-    // Three HDeoptimize will be added for the outer loop.
-    // start >= 0, end <= matrix.length, and null check on matrix.
-    // Three HDeoptimize will be added for the inner loop
-    // start >= 0 (TODO: this may be optimized away),
-    // end <= row.length, and null check on row.
+    // Three HDeoptimize will be added for the outer loop,
+    // two for the index, and null check on matrix. Same
+    // for the inner loop.
     for (int i = start; i < end; i++) {
       int[] row = matrix[i];
       for (int j = start; j < end; j++) {
@@ -994,15 +1000,22 @@
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Phi
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  //  Simplification removes the redundant check
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: Deoptimize
+
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for
-    // 10 <= array.length, and one for null check on array.
+    // Two HDeoptimize will be added. Two for the index
+    // and one for null check on array.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index ec63057..f1f80ca 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -618,6 +618,57 @@
     getSuper();
   }
 
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
+  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Phi>>            Phi [<<Array>>,<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<NC:l\d+>>        NullCheck [<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:                     ArrayGet [<<NC>>,{{i\d+}}] klass:java.lang.Object exact:false
+  private Object[] testInstructionsWithUntypedParent() {
+    Object[] array = null;
+    boolean cond = true;
+    for (int i = 0; i < 10; ++i) {
+      if (cond) {
+        array = new Object[10];
+        array[0] = new Object();
+        cond = false;
+      } else {
+        array[i] = array[0];
+      }
+    }
+    return array;
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 6dce96c..5080f142 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -19,23 +19,25 @@
   public void invokeVirtual() {
   }
 
-  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) builder (after)
+  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeVirtual
   /// CHECK-DAG:                      ReturnVoid
 
   /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
   /// CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void inlineSharpenInvokeVirtual(Main m) {
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenStringInvoke() ssa_builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeVirtual
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index c1f25b3..87ff3f7 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect recursive:true
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect recursive:true
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt
index b0014d7..42b331f 100644
--- a/test/492-checker-inline-invoke-interface/expected.txt
+++ b/test/492-checker-inline-invoke-interface/expected.txt
@@ -2,4 +2,4 @@
 java.lang.Exception
 	at ForceStatic.<clinit>(Main.java:24)
 	at Main.$inline$foo(Main.java:31)
-	at Main.main(Main.java:48)
+	at Main.main(Main.java:50)
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 9a45485..a8b6307 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,15 +31,17 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
   /// CHECK:           InvokeStaticOrDirect
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeVirtual
+  /// CHECK-NOT:       InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK:           ClinitCheck
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 58c92f1..3f6e48b 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -29,6 +29,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linear(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -41,6 +42,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDown(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -53,6 +55,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearObscure(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -66,6 +69,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearVeryObscure(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -79,6 +83,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWhile(int[] x) {
     int i = 0;
     int result = 0;
@@ -92,6 +97,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearThreeWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -108,6 +114,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearFourWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -128,6 +135,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinear(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -143,6 +151,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -162,6 +171,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearWithParameter(int n) {
     int[] x = new int[n];
     for (int i = 0; i < n; i++) {
@@ -174,6 +184,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearCopy(int x[]) {
     int n = x.length;
     int y[] = new int[n];
@@ -183,10 +194,55 @@
     return y;
   }
 
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwo(int x[]) {
+    int n = x.length / 2;
+    int result = 0;
+    for (int i = 0; i < n; i++) {
+      int ii = i << 1;
+      result += x[ii];
+      result += x[ii + 1];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip1(int x[]) {
+    int result = 0;
+    for (int i = 0; i < x.length / 2; i++) {
+      result += x[2 * i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip2(int x[]) {
+    int result = 0;
+    // This case is not optimized.
+    for (int i = 0; i < x.length; i+=2) {
+      result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithCompoundStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
     int result = 0;
@@ -202,6 +258,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -218,6 +275,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -234,6 +292,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -250,6 +309,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -266,6 +326,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -279,6 +340,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -292,6 +354,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -306,6 +369,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -320,6 +384,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -330,10 +395,160 @@
     return result;
   }
 
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int invariantFromPreLoop(int[] x, int y) {
+    int result = 0;
+    // Strange pre-loop that sets upper bound.
+    int hi;
+    while (true) {
+      y = y % 3;
+      hi = x.length;
+      if (y != 123) break;
+    }
+    for (int i = 0; i < hi; i++) {
+       result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnTwoArrayLengths(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < b.length; j++) {
+        // Need to know j < b.length < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < b.length for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnOneArrayLength(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnParameter(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < n for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  // Verifier for triangular methods.
+  private static void verifyTriangular(int[] a, int[] b, int m, int n) {
+    expectEquals(n, a.length);
+    for (int i = 0, k = m; i < n; i++) {
+      expectEquals(a[i], k);
+      if (k > 0) k--;
+    }
+    expectEquals(m, b.length);
+    for (int i = 0; i < m; i++) {
+      expectEquals(b[i], 1);
+    }
+  }
+
+  /// CHECK-START: void Main.bubble(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.bubble(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void bubble(int[] a) {
+    for (int i = a.length; --i >= 0;) {
+      for (int j = 0; j < i; j++) {
+        if (a[j] > a[j+1]) {
+          int tmp = a[j];
+          a[j]  = a[j+1];
+          a[j+1] = tmp;
+        }
+      }
+    }
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicIdiom(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -350,6 +565,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence2(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -372,6 +588,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence4(int tc) {
     int[] x = { 1, 3, 5, 7 };
     // Loop with periodic sequence (0, 1, 2, 3).
@@ -395,6 +612,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -408,6 +626,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -421,6 +640,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -434,6 +654,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -448,6 +669,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -461,6 +683,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -474,6 +697,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -487,6 +711,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -501,6 +726,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
@@ -511,6 +737,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
@@ -521,6 +748,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
@@ -533,6 +761,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
@@ -541,6 +770,306 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE1(int[] x, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[offset + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int wrapAroundDynamicBCE(int[] x) {
+    int w = 9;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int periodicDynamicBCE(int[] x) {
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // This loop could be infinite for hi = max int. Since i is also used
+    // as subscript, however, dynamic bce can proceed.
+    int result = 0;
+    for (int i = lo; i <= hi; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // As above, but now the index is not used as subscript,
+    // and dynamic bce is not applied.
+    int result = 0;
+    for (int k = 0, i = lo; i <= hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
+    int result = 0;
+    // Mix of int and long induction.
+    int k = 0;
+    for (long i = lo; i < hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: ArrayGet
+  static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
+    // Deliberately test array length on a before the loop so that only bounds checks
+    // on constant subscripts remain, making them a viable candidate for hoisting.
+    if (a.length == 0) {
+      return -1;
+    }
+    // Loop that allows BCE on x[i].
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += x[i];
+      if ((i % 10) != 0) {
+        // None of the subscripts inside a conditional are removed by dynamic bce,
+        // making them a candidate for deoptimization based on constant indices.
+        // Compiler should ensure the array loads are not subsequently hoisted
+        // "above" the deoptimization "barrier" on the bounds.
+        a[0][i] = 1;
+        a[1][i] = 2;
+        a[99][i] = 3;
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
+                                                  boolean[] r,
+                                                  byte[] s,
+                                                  char[] t,
+                                                  short[] u,
+                                                  int[] v,
+                                                  long[] w,
+                                                  float[] x,
+                                                  double[] y,
+                                                  Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
+                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+    }
+    return result;
+  }
+
   //
   // Verifier.
   //
@@ -596,6 +1125,9 @@
     }
 
     // Linear with non-unit strides.
+    expectEquals(55, linearByTwo(x));
+    expectEquals(25, linearByTwoSkip1(x));
+    expectEquals(25, linearByTwoSkip2(x));
     expectEquals(56, linearWithCompoundStride());
     expectEquals(66, linearWithLargePositiveStride());
     expectEquals(66, linearWithVeryLargePositiveStride());
@@ -608,6 +1140,17 @@
     expectEquals(55, linearDoWhileUp());
     expectEquals(55, linearDoWhileDown());
     expectEquals(55, linearShort());
+    expectEquals(55, invariantFromPreLoop(x, 1));
+    linearTriangularOnTwoArrayLengths(10);
+    linearTriangularOnOneArrayLength(10);
+    linearTriangularOnParameter(10);
+
+    // Sorting.
+    int[] sort = { 5, 4, 1, 9, 10, 2, 7, 6, 3, 8 };
+    bubble(sort);
+    for (int i = 0; i < 10; i++) {
+      expectEquals(sort[i], x[i]);
+    }
 
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
@@ -690,6 +1233,86 @@
       sResult += 1000;
     }
     expectEquals(1055, sResult);
+
+    // Dynamic BCE.
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, -1, x.length);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE1(x, 0, x.length);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, 0, x.length + 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Dynamic BCE with offset.
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE2(x, 0, x.length, 0);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1054, sResult);
+
+    // Dynamic BCE candidates.
+    expectEquals(55, wrapAroundDynamicBCE(x));
+    expectEquals(15, periodicDynamicBCE(x));
+    expectEquals(55, dynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEMixedInductionTypes(x, 0, 10));
+
+    // Dynamic BCE combined with constant indices.
+    int[][] a;
+    a = new int[0][0];
+    expectEquals(-1, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    a = new int[100][10];
+    expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    for (int i = 0; i < 10; i++) {
+      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
+    }
+    a = new int[2][10];
+    sResult = 0;
+    try {
+      expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    expectEquals(a[0][1], 1);
+    expectEquals(a[1][1], 2);
+
+    // Dynamic BCE combined with constant indices of all types.
+    boolean[] x1 = { true };
+    byte[] x2 = { 2 };
+    char[] x3 = { 3 };
+    short[] x4 = { 4 };
+    int[] x5 = { 5 };
+    long[] x6 = { 6 };
+    float[] x7 = { 7 };
+    double[] x8 = { 8 };
+    Integer[] x9 = { 9 };
+    expectEquals(505,
+        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 1b784ae..3f65d5a 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -35,7 +35,7 @@
   }
 
   /// CHECK-START: boolean Main.stringEqualsNull() register (after)
-  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      <<Invoke:z\d+>> InvokeVirtual
   /// CHECK:      Return [<<Invoke>>]
   public static boolean stringEqualsNull() {
     String o = (String)myObject;
@@ -47,7 +47,7 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeVirtual
   /// CHECK-NOT:      test
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
@@ -56,7 +56,7 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      InvokeVirtual
   /// CHECK:      test
   /// CHECK:      jz/eq
   // Check that we don't try to compare the classes.
diff --git a/test/551-invoke-super/expected.txt b/test/551-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-invoke-super/expected.txt
diff --git a/test/551-invoke-super/info.txt b/test/551-invoke-super/info.txt
new file mode 100644
index 0000000..864ddfe
--- /dev/null
+++ b/test/551-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when resolving to an abstract method.
diff --git a/test/551-invoke-super/smali/invokesuper.smali b/test/551-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/551-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/551-invoke-super/smali/superclass.smali b/test/551-invoke-super/smali/superclass.smali
new file mode 100644
index 0000000..47fbee7
--- /dev/null
+++ b/test/551-invoke-super/smali/superclass.smali
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method abstract public returnInt()I
+.end method
diff --git a/test/551-invoke-super/src/Main.java b/test/551-invoke-super/src/Main.java
new file mode 100644
index 0000000..3a30184
--- /dev/null
+++ b/test/551-invoke-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected AbstractMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof AbstractMethodError)) {
+        throw new Error("Expected AbstractMethodError");
+      }
+    }
+  }
+}
diff --git a/test/552-checker-sharpening/expected.txt b/test/552-checker-sharpening/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-sharpening/expected.txt
diff --git a/test/552-checker-sharpening/info.txt b/test/552-checker-sharpening/info.txt
new file mode 100644
index 0000000..c84539c
--- /dev/null
+++ b/test/552-checker-sharpening/info.txt
@@ -0,0 +1 @@
+Tests for sharpening.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
new file mode 100644
index 0000000..d50edd8
--- /dev/null
+++ b/test/552-checker-sharpening/src/Main.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean doThrow = false;
+
+  private static int $noinline$foo(int x) {
+    if (doThrow) { throw new Error(); }
+    return x;
+  }
+
+  /// CHECK-START: int Main.testSimple(int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  public static int testSimple(int x) {
+    // This call should use PC-relative dex cache array load to retrieve the target method.
+    return $noinline$foo(x);
+  }
+
+  /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           If
+
+  public static int testDiamond(boolean negate, int x) {
+    // These calls should use PC-relative dex cache array loads to retrieve the target method.
+    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    if (negate) {
+      return $noinline$foo(-x);
+    } else {
+      return $noinline$foo(x);
+    }
+  }
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  public static int testLoop(int[] array, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    for (int i : array) {
+      x += $noinline$foo(i);
+    }
+    return x;
+  }
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // but not outside the if.
+    if (array != null) {
+      for (int i : array) {
+        if (negate) {
+          x += $noinline$foo(-i);
+        } else {
+          x += $noinline$foo(i);
+        }
+      }
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(1, testSimple(1));
+    assertIntEquals(1, testDiamond(false, 1));
+    assertIntEquals(-1, testDiamond(true, 1));
+    assertIntEquals(3, testLoop(new int[]{ 2 }, 1));
+    assertIntEquals(8, testLoop(new int[]{ 3, 4 }, 1));
+    assertIntEquals(1, testLoopWithDiamond(null, false, 1));
+    assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
+    assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+  }
+}
diff --git a/test/552-invoke-non-existent-super/expected.txt b/test/552-invoke-non-existent-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-invoke-non-existent-super/expected.txt
diff --git a/test/552-invoke-non-existent-super/info.txt b/test/552-invoke-non-existent-super/info.txt
new file mode 100644
index 0000000..c5428d4
--- /dev/null
+++ b/test/552-invoke-non-existent-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when the super class does not have the method.
diff --git a/test/552-invoke-non-existent-super/smali/invokesuper.smali b/test/552-invoke-non-existent-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/552-invoke-non-existent-super/smali/superclass.smali b/test/552-invoke-non-existent-super/smali/superclass.smali
new file mode 100644
index 0000000..21d961e
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/superclass.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/552-invoke-non-existent-super/src/Main.java b/test/552-invoke-non-existent-super/src/Main.java
new file mode 100644
index 0000000..c264471
--- /dev/null
+++ b/test/552-invoke-non-existent-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected NoSuchMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+  }
+}
diff --git a/test/553-invoke-super/expected.txt b/test/553-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/553-invoke-super/expected.txt
diff --git a/test/553-invoke-super/info.txt b/test/553-invoke-super/info.txt
new file mode 100644
index 0000000..ad99030
--- /dev/null
+++ b/test/553-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode.
diff --git a/test/553-invoke-super/smali/invokesuper.smali b/test/553-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..a6f9b4e
--- /dev/null
+++ b/test/553-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {v1}, LInvokeSuper;->$noinline$returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public $noinline$returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/553-invoke-super/src/Main.java b/test/553-invoke-super/src/Main.java
new file mode 100644
index 0000000..91d2394
--- /dev/null
+++ b/test/553-invoke-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/553-invoke-super/src/SuperClass.java b/test/553-invoke-super/src/SuperClass.java
new file mode 100644
index 0000000..36ce093
--- /dev/null
+++ b/test/553-invoke-super/src/SuperClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  boolean doThrow = false;
+
+  public int $noinline$returnInt() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 42;
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c830ad4..0925d36 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -258,8 +258,10 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat
+  117-nopatchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 47fc50f..c79f4b9 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -128,7 +128,7 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test