Merge "MIPS32: java.lang.*.reverse"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 564bd7e..0ed843b 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -69,6 +69,7 @@
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc
index 3ca518b..4c66c76 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/buffered_output_stream.cc
@@ -20,18 +20,24 @@
 
 namespace art {
 
-BufferedOutputStream::BufferedOutputStream(OutputStream* out)
-    : OutputStream(out->GetLocation()), out_(out), used_(0) {}
+BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out)
+    : OutputStream(out->GetLocation()),  // Before out is moved to out_.
+      out_(std::move(out)),
+      used_(0) {}
+
+BufferedOutputStream::~BufferedOutputStream() {
+  FlushBuffer();
+}
 
 bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
     return out_->WriteFully(buffer, byte_count);
   }
   if (used_ + byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
   }
@@ -42,6 +48,10 @@
 }
 
 bool BufferedOutputStream::Flush() {
+  return FlushBuffer() && out_->Flush();
+}
+
+bool BufferedOutputStream::FlushBuffer() {
   bool success = true;
   if (used_ > 0) {
     success = out_->WriteFully(&buffer_[0], used_);
@@ -51,7 +61,7 @@
 }
 
 off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
-  if (!Flush()) {
+  if (!FlushBuffer()) {
     return -1;
   }
   return out_->Seek(offset, whence);
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
index b447f41..1da3a69 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/buffered_output_stream.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
 #define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
 
+#include <memory>
+
 #include "output_stream.h"
 
 #include "globals.h"
@@ -25,26 +27,23 @@
 
 class BufferedOutputStream FINAL : public OutputStream {
  public:
-  explicit BufferedOutputStream(OutputStream* out);
+  explicit BufferedOutputStream(std::unique_ptr<OutputStream> out);
 
-  virtual ~BufferedOutputStream() {
-    Flush();
-    delete out_;
-  }
+  ~BufferedOutputStream() OVERRIDE;
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
 
-  bool Flush();
+  bool Flush() OVERRIDE;
 
  private:
   static const size_t kBufferSize = 8 * KB;
 
-  OutputStream* const out_;
+  bool FlushBuffer();
 
+  std::unique_ptr<OutputStream> const out_;
   uint8_t buffer_[kBufferSize];
-
   size_t used_;
 
   DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d67087e..9d3af16 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -45,7 +45,6 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
 #include "profiler.h"
@@ -77,9 +76,6 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
-
 // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
@@ -2514,19 +2510,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::WriteElf(const std::string& android_root,
-                              bool is_host,
-                              const std::vector<const art::DexFile*>& dex_files,
-                              OatWriter* oat_writer,
-                              art::File* file)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
-    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  } else {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  }
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
     return false;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d90d610..1347b37 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -59,7 +59,6 @@
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
-class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -398,12 +397,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  bool WriteElf(const std::string& android_root,
-                bool is_host,
-                const std::vector<const DexFile*>& dex_files,
-                OatWriter* oat_writer,
-                File* file);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
new file mode 100644
index 0000000..a391e4d
--- /dev/null
+++ b/compiler/dwarf/method_debug_info.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+#define ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+
+#include "dex_file.h"
+
+namespace art {
+class CompiledMethod;
+namespace dwarf {
+
+struct MethodDebugInfo {
+  const DexFile* dex_file_;
+  size_t class_def_index_;
+  uint32_t dex_method_index_;
+  uint32_t access_flags_;
+  const DexFile::CodeItem* code_item_;
+  bool deduped_;
+  uint32_t low_pc_;
+  uint32_t high_pc_;
+  CompiledMethod* compiled_method_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 6e8dfd6..c19bc3d 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -27,6 +27,7 @@
 #include "elf_utils.h"
 #include "file_output_stream.h"
 #include "leb128.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -100,7 +101,7 @@
       header_.sh_entsize = entsize;
     }
 
-    virtual ~Section() {
+    ~Section() OVERRIDE {
       if (started_) {
         CHECK(finished_);
       }
@@ -187,6 +188,12 @@
       return owner_->Seek(offset, whence);
     }
 
+    // This function flushes the output and returns whether it succeeded.
+    // If there was a previous failure, this does nothing and returns false, i.e. failed.
+    bool Flush() OVERRIDE {
+      return owner_->Flush();
+    }
+
     Elf_Word GetSectionIndex() const {
       DCHECK(started_);
       DCHECK_NE(section_index_, 0u);
@@ -312,7 +319,7 @@
 
   // Encode patch locations as LEB128 list of deltas between consecutive addresses.
   // (exposed publicly for tests)
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+  static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations,
                                std::vector<uint8_t>* buffer) {
     buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
     uintptr_t address = 0;  // relative to start of section.
@@ -323,9 +330,9 @@
     }
   }
 
-  void WritePatches(const char* name, const std::vector<uintptr_t>* patch_locations) {
+  void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) {
     std::vector<uint8_t> buffer;
-    EncodeOatPatches(*patch_locations, &buffer);
+    EncodeOatPatches(patch_locations, &buffer);
     std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
     s->Start();
     s->WriteFully(buffer.data(), buffer.size());
@@ -385,6 +392,7 @@
     Seek(0, kSeekSet);
     WriteFully(&elf_header, sizeof(elf_header));
     WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    Flush();
   }
 
   // The running program does not have access to section headers
@@ -509,6 +517,13 @@
     return new_offset;
   }
 
+  bool Flush() {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
+  }
+
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03f8ceb..357d5f6 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -25,13 +25,16 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "os.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CompilerDriver;
-class DexFile;
 class ElfFile;
-class OatWriter;
+class OutputStream;
+
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
 
 class ElfWriter {
  public:
@@ -46,21 +49,21 @@
 
   static bool Fixup(File* file, uintptr_t oat_data_begin);
 
- protected:
-  ElfWriter(const CompilerDriver& driver, File* elf_file)
-    : compiler_driver_(&driver), elf_file_(elf_file) {
-  }
-
   virtual ~ElfWriter() {}
 
-  virtual bool Write(OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void Start() = 0;
+  virtual OutputStream* StartRoData() = 0;
+  virtual void EndRoData(OutputStream* rodata) = 0;
+  virtual OutputStream* StartText() = 0;
+  virtual void EndText(OutputStream* text) = 0;
+  virtual void SetBssSize(size_t bss_size) = 0;
+  virtual void WriteDynamicSection() = 0;
+  virtual void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) = 0;
+  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
+  virtual bool End() = 0;
 
-  const CompilerDriver* const compiler_driver_;
-  File* const elf_file_;
+ protected:
+  ElfWriter() = default;
 };
 
 }  // namespace art
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 81f574f..9dc6565 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -17,6 +17,7 @@
 #include "elf_writer_debug.h"
 
 #include <unordered_set>
+#include <vector>
 
 #include "base/casts.h"
 #include "base/stl_util.h"
@@ -25,6 +26,7 @@
 #include "dex_file-inl.h"
 #include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
+#include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
 #include "oat_writer.h"
@@ -203,7 +205,7 @@
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format) {
   CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
         format == dwarf::DW_EH_FRAME_FORMAT);
@@ -233,7 +235,7 @@
     cfi_section->WriteFully(buffer.data(), buffer.size());
     buffer_address += buffer.size();
     buffer.clear();
-    for (const OatWriter::DebugInfo& mi : method_infos) {
+    for (const MethodDebugInfo& mi : method_infos) {
       if (!mi.deduped_) {  // Only one FDE per unique address.
         ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
         if (!opcodes.empty()) {
@@ -286,12 +288,13 @@
     header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
     header_section->End();
   } else {
-    builder->WritePatches(".debug_frame.oat_patches", &patch_locations);
+    builder->WritePatches(".debug_frame.oat_patches",
+                          ArrayRef<const uintptr_t>(patch_locations));
   }
 }
 
 struct CompilationUnit {
-  std::vector<const OatWriter::DebugInfo*> methods_;
+  std::vector<const MethodDebugInfo*> methods_;
   size_t debug_line_offset_ = 0;
   uint32_t low_pc_ = 0xFFFFFFFFU;
   uint32_t high_pc_ = 0;
@@ -417,7 +420,7 @@
     // Write table into .debug_loc which describes location of dex register.
     // The dex register might be valid only at some points and it might
     // move between machine registers and stack.
-    void WriteRegLocation(const OatWriter::DebugInfo* method_info, uint16_t vreg,
+    void WriteRegLocation(const MethodDebugInfo* method_info, uint16_t vreg,
                           bool is64bitValue, uint32_t compilation_unit_low_pc) {
       using Kind = DexRegisterLocation::Kind;
       bool is_optimizing = method_info->compiled_method_->GetQuickCode().size() > 0 &&
@@ -740,7 +743,8 @@
 
   void End() {
     builder_->GetDebugInfo()->End();
-    builder_->WritePatches(".debug_info.oat_patches", &debug_info_patches_);
+    builder_->WritePatches(".debug_info.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_info_patches_));
     builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
     builder_->WriteSection(".debug_str", &debug_str_.Data());
     builder_->WriteSection(".debug_loc", &debug_loc_);
@@ -807,7 +811,7 @@
     if (dwarf_isa != -1) {
       opcodes.SetISA(dwarf_isa);
     }
-    for (const OatWriter::DebugInfo* mi : compilation_unit.methods_) {
+    for (const MethodDebugInfo* mi : compilation_unit.methods_) {
       // Ignore function if we have already generated line table for the same address.
       // It would confuse the debugger and the DWARF specification forbids it.
       if (mi->deduped_) {
@@ -924,7 +928,8 @@
 
   void End() {
     builder_->GetDebugLine()->End();
-    builder_->WritePatches(".debug_line.oat_patches", &debug_line_patches);
+    builder_->WritePatches(".debug_line.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_line_patches));
   }
 
  private:
@@ -934,11 +939,11 @@
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos) {
+                        const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
+  for (const MethodDebugInfo& mi : method_infos) {
     auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
     const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
     if (compilation_units.empty() || source_file != last_source_file) {
@@ -975,18 +980,18 @@
 // Explicit instantiations
 template void WriteCFISection<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat format);
 template void WriteCFISection<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat format);
 template void WriteDebugSections<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 template void WriteDebugSections<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e58fd0a..9ed102f 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,23 +17,22 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include <vector>
-
 #include "elf_builder.h"
 #include "dwarf/dwarf_constants.h"
 #include "oat_writer.h"
+#include "utils/array_ref.h"
 
 namespace art {
 namespace dwarf {
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format);
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos);
+                        const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 5c059e1..9da2af8 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,19 +21,16 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/unix_file/fd_file.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf.h"
 #include "elf_builder.h"
-#include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
-#include "oat.h"
-#include "oat_writer.h"
 #include "utils.h"
 
 namespace art {
@@ -57,125 +54,186 @@
 constexpr bool kGenerateSingleArmMappingSymbol = true;
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Create(File* elf_file,
-                                      OatWriter* oat_writer,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
+class ElfWriterQuick FINAL : public ElfWriter {
+ public:
+  ElfWriterQuick(InstructionSet instruction_set,
+                 const CompilerOptions* compiler_options,
+                 File* elf_file);
+  ~ElfWriterQuick();
+
+  void Start() OVERRIDE;
+  OutputStream* StartRoData() OVERRIDE;
+  void EndRoData(OutputStream* rodata) OVERRIDE;
+  OutputStream* StartText() OVERRIDE;
+  void EndText(OutputStream* text) OVERRIDE;
+  void SetBssSize(size_t bss_size) OVERRIDE;
+  void WriteDynamicSection() OVERRIDE;
+  void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) OVERRIDE;
+  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
+  bool End() OVERRIDE;
+
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer);
+
+ private:
+  const CompilerOptions* const compiler_options_;
+  File* const elf_file_;
+  std::unique_ptr<BufferedOutputStream> output_stream_;
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
+};
+
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file) {
+  if (Is64BitInstructionSet(instruction_set)) {
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, compiler_options, elf_file);
+  } else {
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, compiler_options, elf_file);
+  }
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos);
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Write(
-    OatWriter* oat_writer,
-    const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
-    const std::string& android_root_unused ATTRIBUTE_UNUSED,
-    bool is_host_unused ATTRIBUTE_UNUSED) {
-  const InstructionSet isa = compiler_driver_->GetInstructionSet();
-  std::unique_ptr<BufferedOutputStream> output_stream(
-      new BufferedOutputStream(new FileOutputStream(elf_file_)));
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(
-      new ElfBuilder<ElfTypes>(isa, output_stream.get()));
+ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const CompilerOptions* compiler_options,
+                                         File* elf_file)
+    : ElfWriter(),
+      compiler_options_(compiler_options),
+      elf_file_(elf_file),
+      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {}
 
-  builder->Start();
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
 
-  auto* rodata = builder->GetRoData();
-  auto* text = builder->GetText();
-  auto* bss = builder->GetBss();
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::Start() {
+  builder_->Start();
+}
 
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() {
+  auto* rodata = builder_->GetRoData();
   rodata->Start();
-  if (!oat_writer->WriteRodata(rodata)) {
-    return false;
-  }
-  rodata->End();
+  return rodata;
+}
 
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndRoData(OutputStream* rodata) {
+  CHECK_EQ(builder_->GetRoData(), rodata);
+  builder_->GetRoData()->End();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartText() {
+  auto* text = builder_->GetText();
   text->Start();
-  if (!oat_writer->WriteCode(text)) {
-    return false;
-  }
-  text->End();
+  return text;
+}
 
-  if (oat_writer->GetBssSize() != 0) {
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) {
+  CHECK_EQ(builder_->GetText(), text);
+  builder_->GetText()->End();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) {
+  auto* bss = builder_->GetBss();
+  if (bss_size != 0u) {
     bss->Start();
-    bss->SetSize(oat_writer->GetBssSize());
+    bss->SetSize(bss_size);
     bss->End();
   }
-
-  builder->WriteDynamicSection(elf_file_->GetPath());
-
-  if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-    const auto& method_infos = oat_writer->GetMethodDebugInfo();
-    if (!method_infos.empty()) {
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate CFI (stack unwinding information).
-      dwarf::WriteCFISection(builder.get(), method_infos, kCFIFormat);
-      // Write DWARF .debug_* sections.
-      dwarf::WriteDebugSections(builder.get(), method_infos);
-    }
-  }
-
-  // Add relocation section for .text.
-  if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
-    // Note that ElfWriter::Fixup will be called regardless and therefore
-    // we need to include oat_patches for debug sections unconditionally.
-    builder->WritePatches(".text.oat_patches", &oat_writer->GetAbsolutePatchLocations());
-  }
-
-  builder->End();
-
-  return builder->Good() && output_stream->Flush();
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
+void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
+  builder_->WriteDynamicSection(elf_file_->GetPath());
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
+    const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    if (!method_infos.empty()) {
+      // Add methods to .symtab.
+      WriteDebugSymbols(builder_.get(), method_infos);
+      // Generate CFI (stack unwinding information).
+      dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat);
+      // Write DWARF .debug_* sections.
+      dwarf::WriteDebugSections(builder_.get(), method_infos);
+    }
+  }
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WritePatchLocations(
+    const ArrayRef<const uintptr_t>& patch_locations) {
+  // Add relocation section for .text.
+  if (compiler_options_->GetIncludePatchInformation()) {
+    // Note that ElfWriter::Fixup will be called regardless and therefore
+    // we need to include oat_patches for debug sections unconditionally.
+    builder_->WritePatches(".text.oat_patches", patch_locations);
+  }
+}
+
+template <typename ElfTypes>
+bool ElfWriterQuick<ElfTypes>::End() {
+  builder_->End();
+
+  return builder_->Good();
+}
+
+template <typename ElfTypes>
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   bool generated_mapping_symbol = false;
   auto* strtab = builder->GetStrTab();
   auto* symtab = builder->GetSymTab();
 
-  if (method_info.empty()) {
+  if (method_infos.empty()) {
     return;
   }
 
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
   std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
     }
   }
 
   strtab->Start();
   strtab->Write("");  // strtab should start with empty string.
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
-    if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
       name += " [DEDUPED]";
     }
 
-    uint32_t low_pc = it->low_pc_;
+    uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += it->compiled_method_->CodeDelta();
+    low_pc += info.compiled_method_->CodeDelta();
     symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     // Note that even if we generate just a single mapping symbol, ARM's Streamline
     // requires it to match function symbol.  Just address 0 does not work.
-    if (it->compiled_method_->GetInstructionSet() == kThumb2) {
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), it->low_pc_ & ~1,
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
                     true, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 83781ab..347d372 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,46 +17,19 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
-#include "elf_utils.h"
+#include <memory>
+
+#include "arch/instruction_set.h"
 #include "elf_writer.h"
-#include "oat_writer.h"
+#include "os.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfWriterQuick FINAL : public ElfWriter {
- public:
-  // Write an ELF file. Returns true on success, false on failure.
-  static bool Create(File* file,
-                     OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host,
-                     const CompilerDriver& driver)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+class CompilerOptions;
 
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                               std::vector<uint8_t>* buffer);
-
- protected:
-  bool Write(OatWriter* oat_writer,
-             const std::vector<const DexFile*>& dex_files,
-             const std::string& android_root,
-             bool is_host)
-      OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  ElfWriterQuick(const CompilerDriver& driver, File* elf_file)
-    : ElfWriter(driver, elf_file) {}
-  ~ElfWriterQuick() {}
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
-};
-
-// Explicitly instantiated in elf_writer_quick.cc
-typedef ElfWriterQuick<ElfTypes32> ElfWriterQuick32;
-typedef ElfWriterQuick<ElfTypes64> ElfWriterQuick64;
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file);
 
 }  // namespace art
 
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index b413a9e..7cf774e 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -101,7 +101,8 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfBuilder<ElfTypes32>::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(ArrayRef<const uintptr_t>(patch_locations),
+                                             &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/file_output_stream.cc b/compiler/file_output_stream.cc
index 3ee16f5..bbfbdfd 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/file_output_stream.cc
@@ -33,4 +33,8 @@
   return lseek(file_->Fd(), offset, static_cast<int>(whence));
 }
 
+bool FileOutputStream::Flush() {
+  return file_->Flush() == 0;
+}
+
 }  // namespace art
diff --git a/compiler/file_output_stream.h b/compiler/file_output_stream.h
index 9dfbd7f..6917d83 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/file_output_stream.h
@@ -27,11 +27,13 @@
  public:
   explicit FileOutputStream(File* file);
 
-  virtual ~FileOutputStream() {}
+  ~FileOutputStream() OVERRIDE {}
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
 
  private:
   File* const file_;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6df1527..5f4a922 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,7 +23,9 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
 #include "lock_word.h"
@@ -92,12 +94,37 @@
                            /*compiling_boot_image*/true,
                            &timings,
                            &key_value_store);
-      bool success = writer->PrepareImageAddressSpace() &&
-          compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                     !kIsTargetBuild,
-                                     class_linker->GetBootClassPath(),
-                                     &oat_writer,
-                                     oat_file.GetFile());
+      std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+          compiler_driver_->GetInstructionSet(),
+          &compiler_driver_->GetCompilerOptions(),
+          oat_file.GetFile());
+      bool success = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(success);
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      bool rodata_ok = oat_writer.WriteRodata(rodata);
+      ASSERT_TRUE(rodata_ok);
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      bool text_ok = oat_writer.WriteCode(text);
+      ASSERT_TRUE(text_ok);
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      success = elf_writer->End();
+
       ASSERT_TRUE(success);
     }
   }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 2125c9a..d001495 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -170,18 +170,6 @@
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Check if the method is already compiled.
-  if (runtime->GetJit()->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;
-  }
-
-  // Don't compile the method if we are supposed to be deoptimized.
-  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
-  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
-    return false;
-  }
-
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
@@ -190,13 +178,13 @@
   }
 
   // Do the compilation.
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
   bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+    JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 030451c..c305b12 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -27,6 +27,9 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
@@ -134,11 +137,36 @@
                          /*compiling_boot_image*/false,
                          &timings,
                          &key_value_store);
-    return compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                      !kIsTargetBuild,
-                                      dex_files,
-                                      &oat_writer,
-                                      file);
+    std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+        compiler_driver_->GetInstructionSet(),
+        &compiler_driver_->GetCompilerOptions(),
+        file);
+
+    elf_writer->Start();
+
+    OutputStream* rodata = elf_writer->StartRoData();
+    if (!oat_writer.WriteRodata(rodata)) {
+      return false;
+    }
+    elf_writer->EndRoData(rodata);
+
+    OutputStream* text = elf_writer->StartText();
+    if (!oat_writer.WriteCode(text)) {
+      return false;
+    }
+    elf_writer->EndText(text);
+
+    elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+    elf_writer->WriteDynamicSection();
+
+    ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+    elf_writer->WriteDebugInfo(method_infos);
+
+    ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+    elf_writer->WritePatchLocations(patch_locations);
+
+    return elf_writer->End();
   }
 
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 40a3f14..a6a49f9 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -31,6 +31,7 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
@@ -485,7 +486,7 @@
         // Record debug information for this function if we are doing that.
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(DebugInfo {
+        writer_->method_info_.push_back(dwarf::MethodDebugInfo {
             dex_file_,
             class_def_index_,
             it.GetMemberIndex(),
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 7027434..6c46ebc 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -38,6 +38,10 @@
 class TimingLogger;
 class TypeLookupTable;
 
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
+
 // OatHeader         variable length with count of D OatDexFiles
 //
 // OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
@@ -129,19 +133,7 @@
 
   ~OatWriter();
 
-  struct DebugInfo {
-    const DexFile* dex_file_;
-    size_t class_def_index_;
-    uint32_t dex_method_index_;
-    uint32_t access_flags_;
-    const DexFile::CodeItem *code_item_;
-    bool deduped_;
-    uint32_t low_pc_;
-    uint32_t high_pc_;
-    CompiledMethod* compiled_method_;
-  };
-
-  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
+  const std::vector<dwarf::MethodDebugInfo>& GetMethodDebugInfo() const {
     return method_info_;
   }
 
@@ -280,7 +272,7 @@
     DISALLOW_COPY_AND_ASSIGN(OatClass);
   };
 
-  std::vector<DebugInfo> method_info_;
+  std::vector<dwarf::MethodDebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index cca0baf..a448302 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/arena_containers.h"
 #include "induction_var_range.h"
+#include "side_effects_analysis.h"
 #include "nodes.h"
 
 namespace art {
@@ -175,6 +176,24 @@
     return false;
   }
 
+  // Returns if it's certain this->bound > `bound`.
+  bool GreaterThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ > bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound < `bound`.
+  bool LessThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ < bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -252,157 +271,6 @@
   int32_t constant_;
 };
 
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
-  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
-      : induction_variable_(induction_variable),
-        found_array_length_(nullptr),
-        offset_low_(std::numeric_limits<int32_t>::max()),
-        offset_high_(std::numeric_limits<int32_t>::min()) {
-    Run();
-  }
-
-  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
-  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
-  int32_t GetOffsetLow() const { return offset_low_; }
-  int32_t GetOffsetHigh() const { return offset_high_; }
-
-  // Returns if `block` that is in loop_info may exit the loop, unless it's
-  // the loop header for loop_info.
-  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
-    DCHECK(loop_info->Contains(*block));
-    if (block == loop_info->GetHeader()) {
-      // Loop header of loop_info. Exiting loop is normal.
-      return false;
-    }
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      if (!loop_info->Contains(*successor)) {
-        // One of the successors exits the loop.
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
-    for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
-      if (!block->Dominates(back_edge)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void Run() {
-    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
-    HBasicBlock* block = it_loop.Current();
-    DCHECK(block == induction_variable_->GetBlock());
-    // Skip loop header. Since narrowed value range of a MonotonicValueRange only
-    // applies to the loop body (after the test at the end of the loop header).
-    it_loop.Advance();
-    for (; !it_loop.Done(); it_loop.Advance()) {
-      block = it_loop.Current();
-      DCHECK(block->IsInLoop());
-      if (!DominatesAllBackEdges(block, loop_info)) {
-        // In order not to trigger deoptimization unnecessarily, make sure
-        // that all array accesses collected are really executed in the loop.
-        // For array accesses in a branch inside the loop, don't collect the
-        // access. The bounds check in that branch might not be eliminated.
-        continue;
-      }
-      if (EarlyExit(block, loop_info)) {
-        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
-        // that the loop will loop through the full monotonic value range from
-        // initial_ to end_. So adding deoptimization might be too aggressive and can
-        // trigger deoptimization unnecessarily even if the loop won't actually throw
-        // AIOOBE.
-        found_array_length_ = nullptr;
-        return;
-      }
-      for (HInstruction* instruction = block->GetFirstInstruction();
-           instruction != nullptr;
-           instruction = instruction->GetNext()) {
-        if (!instruction->IsBoundsCheck()) {
-          continue;
-        }
-
-        HInstruction* length_value = instruction->InputAt(1);
-        if (length_value->IsIntConstant()) {
-          // TODO: may optimize for constant case.
-          continue;
-        }
-
-        if (length_value->IsPhi()) {
-          // When adding deoptimizations in outer loops, we might create
-          // a phi for the array length, and update all uses of the
-          // length in the loop to that phi. Therefore, inner loops having
-          // bounds checks on the same array will use that phi.
-          // TODO: handle these cases.
-          continue;
-        }
-
-        DCHECK(length_value->IsArrayLength());
-        HArrayLength* array_length = length_value->AsArrayLength();
-
-        HInstruction* array = array_length->InputAt(0);
-        if (array->IsNullCheck()) {
-          array = array->AsNullCheck()->InputAt(0);
-        }
-        if (loop_info->Contains(*array->GetBlock())) {
-          // Array is defined inside the loop. Skip.
-          continue;
-        }
-
-        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
-          // There is already access for another array recorded for the loop.
-          // TODO: handle multiple arrays.
-          continue;
-        }
-
-        HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
-        HInstruction* left = index;
-        int32_t right = 0;
-        if (left == induction_variable_ ||
-            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
-             left == induction_variable_)) {
-          // For patterns like array[i] or array[i + 2].
-          if (right < offset_low_) {
-            offset_low_ = right;
-          }
-          if (right > offset_high_) {
-            offset_high_ = right;
-          }
-        } else {
-          // Access not in induction_variable/(induction_variable_ + constant)
-          // format. Skip.
-          continue;
-        }
-        // Record this array.
-        found_array_length_ = array_length;
-      }
-    }
-  }
-
- private:
-  // The instruction that corresponds to a MonotonicValueRange.
-  HInstruction* induction_variable_;
-
-  // The array length of the array that's accessed inside the loop body.
-  HArrayLength* found_array_length_;
-
-  // The lowest and highest constant offsets relative to induction variable
-  // instruction_ in all array accesses.
-  // If array access are: array[i-1], array[i], array[i+1],
-  // offset_low_ is -1 and offset_high is 1.
-  int32_t offset_low_;
-  int32_t offset_high_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +368,13 @@
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
         initial_(initial),
-        end_(nullptr),
-        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
-  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
   ValueBound GetBound() const { return bound_; }
-  void SetEnd(HInstruction* end) { end_ = end; }
-  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
   HBasicBlock* GetLoopHeader() const {
     DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
     return induction_variable_->GetBlock();
@@ -519,23 +382,6 @@
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
-  HBasicBlock* GetLoopHeaderSuccesorInLoop() {
-    HBasicBlock* header = GetLoopHeader();
-    HInstruction* instruction = header->GetLastInstruction();
-    DCHECK(instruction->IsIf());
-    HIf* h_if = instruction->AsIf();
-    HLoopInformation* loop_info = header->GetLoopInformation();
-    bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
-    bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
-    // Just in case it's some strange loop structure.
-    if (true_successor_in_loop && false_successor_in_loop) {
-      return nullptr;
-    }
-    DCHECK(true_successor_in_loop || false_successor_in_loop);
-    return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
-  }
-
   // If it's certain that this value range fits in other_range.
   bool FitsIn(ValueRange* other_range) const OVERRIDE {
     if (other_range == nullptr) {
@@ -627,467 +473,9 @@
     }
   }
 
-  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
-  // For example, this loop:
-  //
-  //   for (int i = start; i < end; i++) {
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // will be transformed to:
-  //
-  //   int array_length_in_loop_body_if_needed;
-  //   if (start >= end) {
-  //     array_length_in_loop_body_if_needed = 0;
-  //   } else {
-  //     if (start < 1) deoptimize();
-  //     if (array == null) deoptimize();
-  //     array_length = array.length;
-  //     if (end > array_length - 1) deoptimize;
-  //     array_length_in_loop_body_if_needed = array_length;
-  //   }
-  //   for (int i = start; i < end; i++) {
-  //     // No more null check and bounds check.
-  //     // array.length value is replaced with array_length_in_loop_body_if_needed
-  //     // in the loop body.
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // We basically first go through the loop body and find those array accesses whose
-  // index is at a constant offset from the induction variable ('i' in the above example),
-  // and update offset_low and offset_high along the way. We then add the following
-  // deoptimizations in the loop pre-header (suppose end is not inclusive).
-  //   if (start < -offset_low) deoptimize();
-  //   if (end >= array.length - offset_high) deoptimize();
-  // It might be necessary to first hoist array.length (and the null check on it) out of
-  // the loop with another deoptimization.
-  //
-  // In order not to trigger deoptimization unnecessarily, we want to make a strong
-  // guarantee that no deoptimization is triggered if the loop body itself doesn't
-  // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
-  // body must throw AIOOBE).
-  // This is achieved by the following:
-  // 1) We only process loops that iterate through the full monotonic range from
-  //    initial_ to end_. We do the following checks to make sure that's the case:
-  //    a) The loop doesn't have early exit (via break, return, etc.)
-  //    b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
-  // 2) We only collect array accesses of blocks in the loop body that dominate
-  //    all loop back edges, these array accesses are guaranteed to happen
-  //    at each loop iteration.
-  // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
-  // when the induction variable is at initial_ and end_ must be in a legal range.
-  // Since the added deoptimizations are basically checking the induction variable
-  // at initial_ and end_ values, no deoptimization will be triggered either.
-  //
-  // A special case is the loop body isn't entered at all. In that case, we may still
-  // add deoptimization due to the analysis described above. In order not to trigger
-  // deoptimization, we do a test between initial_ and end_ first and skip over
-  // the added deoptimization.
-  ValueRange* NarrowWithDeoptimization() {
-    if (increment_ != 1 && increment_ != -1) {
-      // In order not to trigger deoptimization unnecessarily, we want to
-      // make sure the loop iterates through the full range from initial_ to
-      // end_ so that boundaries are covered by the loop. An increment of 2,
-      // for example, may skip end_.
-      return this;
-    }
-
-    if (end_ == nullptr) {
-      // No full info to add deoptimization.
-      return this;
-    }
-
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't add a check in loop pre-header if the value isn't available there.
-      return this;
-    }
-
-    ArrayAccessInsideLoopFinder finder(induction_variable_);
-
-    if (!finder.HasFoundArrayLength()) {
-      // No array access was found inside the loop that can benefit
-      // from deoptimization.
-      return this;
-    }
-
-    if (!AddDeoptimization(finder)) {
-      return this;
-    }
-
-    // After added deoptimizations, induction variable fits in
-    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
-    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
-    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
-    // We've narrowed the range after added deoptimizations.
-    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
-  }
-
-  // Returns true if adding a (constant >= value) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    // See if we can prove the relationship first.
-    if (value->IsIntConstant()) {
-      if (value->AsIntConstant()->GetValue() >= constant) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Try to filter out cases that the loop entry test will never be true.
-  bool LoopEntryTestUseful() {
-    if (initial_->IsIntConstant() && end_->IsIntConstant()) {
-      int32_t initial_val = initial_->AsIntConstant()->GetValue();
-      int32_t end_val = end_->AsIntConstant()->GetValue();
-      if (increment_ == 1) {
-        if (inclusive_) {
-          return initial_val > end_val;
-        } else {
-          return initial_val >= end_val;
-        }
-      } else {
-        DCHECK_EQ(increment_, -1);
-        if (inclusive_) {
-          return initial_val < end_val;
-        } else {
-          return initial_val <= end_val;
-        }
-      }
-    }
-    return true;
-  }
-
-  // Returns the block for adding deoptimization.
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    // Deoptimization is only added when both initial_ and end_ are defined
-    // before the loop.
-    DCHECK(initial_->GetBlock()->Dominates(pre_header));
-    DCHECK(end_->GetBlock()->Dominates(pre_header));
-
-    // If it can be proven the loop body is definitely entered (unless exception
-    // is thrown in the loop header for which triggering deoptimization is fine),
-    // there is no need for tranforming the loop. In that case, deoptimization
-    // will just be added in the loop pre-header.
-    if (!LoopEntryTestUseful()) {
-      return pre_header;
-    }
-
-    HGraph* graph = header->GetGraph();
-    graph->TransformLoopHeaderForBCE(header);
-    HBasicBlock* new_pre_header = header->GetDominator();
-    DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
-    HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
-
-    dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
-    deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
-    new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
-    return deopt_block;
-  }
-
-  // Adds a test between initial_ and end_ to see if the loop body is entered.
-  // If the loop body isn't entered at all, it jumps to the loop pre-header (after
-  // transformation) to avoid any deoptimization.
-  void AddLoopBodyEntryTest() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    HBasicBlock* if_block = pre_header->GetDominator();
-    HGraph* graph = header->GetGraph();
-
-    HCondition* cond;
-    if (increment_ == 1) {
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
-      }
-    } else {
-      DCHECK_EQ(increment_, -1);
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HLessThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
-      }
-    }
-    HIf* h_if = new (graph->GetArena()) HIf(cond);
-    if_block->AddInstruction(cond);
-    if_block->AddInstruction(h_if);
-  }
-
-  // Adds a check that (value >= constant), and HDeoptimize otherwise.
-  void AddDeoptimizationConstant(HInstruction* value,
-                                 int32_t constant,
-                                 HBasicBlock* deopt_block,
-                                 bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-    if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
-    }
-
-    HIntConstant* const_instr = graph->GetIntConstant(constant);
-    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
-    HDeoptimize* deoptimize = new (graph->GetArena())
-        HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
-    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-        suspend_check->GetEnvironment(), header);
-  }
-
-  // Returns true if adding a (value <= array_length + offset) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationArrayLength(HInstruction* value,
-                                       HArrayLength* array_length,
-                                       int32_t offset,
-                                       bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    if (array_length->GetBlock() == header) {
-      // array_length_in_loop_body_if_needed only has correct value when the loop
-      // body is entered. We bail out in this case. Usually array_length defined
-      // in the loop header is already hoisted by licm.
-      return false;
-    } else {
-      // array_length is defined either before the loop header already, or in
-      // the loop body since it's used in the loop body. If it's defined in the loop body,
-      // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
-      // all the uses of array_length must be dominated by its definition in the loop
-      // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
-      // array_length once the loop body is entered so all the uses of the phi will
-      // use the correct value.
-    }
-
-    if (offset > 0) {
-      // There might be overflow issue.
-      // TODO: handle this, possibly with some distance relationship between
-      // offset_low and offset_high, or using another deoptimization to make
-      // sure (array_length + offset) doesn't overflow.
-      return false;
-    }
-
-    // See if we can prove the relationship first.
-    if (value == array_length) {
-      if (offset >= 0) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
-  void AddDeoptimizationArrayLength(HInstruction* value,
-                                    HArrayLength* array_length,
-                                    int32_t offset,
-                                    HBasicBlock* deopt_block,
-                                    bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
-    // We may need to hoist null-check and array_length out of loop first.
-    if (!array_length->GetBlock()->Dominates(deopt_block)) {
-      // array_length must be defined in the loop body.
-      DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
-      DCHECK(array_length->GetBlock() != header);
-
-      HInstruction* array = array_length->InputAt(0);
-      HNullCheck* null_check = array->AsNullCheck();
-      if (null_check != nullptr) {
-        array = null_check->InputAt(0);
-      }
-      // We've already made sure the array is defined before the loop when collecting
-      // array accesses for the loop.
-      DCHECK(array->GetBlock()->Dominates(deopt_block));
-      if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
-        // Hoist null check out of loop with a deoptimization.
-        HNullConstant* null_constant = graph->GetNullConstant();
-        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
-        // TODO: for one dex_pc, share the same deoptimization slow path.
-        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
-            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
-        deopt_block->InsertInstructionBefore(
-            null_check_cond, deopt_block->GetLastInstruction());
-        deopt_block->InsertInstructionBefore(
-            null_check_deoptimize, deopt_block->GetLastInstruction());
-        // Eliminate null check in the loop.
-        null_check->ReplaceWith(array);
-        null_check->GetBlock()->RemoveInstruction(null_check);
-        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-            suspend_check->GetEnvironment(), header);
-      }
-
-      HArrayLength* new_array_length
-          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
-      deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
-      if (loop_entry_test_block_added) {
-        // Replace array_length defined inside the loop body with a phi
-        // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
-        // no vreg number for it.
-        HPhi* phi = new (graph->GetArena()) HPhi(
-            graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
-        // Set to 0 if the loop body isn't entered.
-        phi->SetRawInputAt(0, graph->GetIntConstant(0));
-        // Set to array.length if the loop body is entered.
-        phi->SetRawInputAt(1, new_array_length);
-        pre_header->AddPhi(phi);
-        array_length->ReplaceWith(phi);
-        // Make sure phi is only used after the loop body is entered.
-        if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> it(phi->GetUses());
-               !it.Done();
-               it.Advance()) {
-            HInstruction* user = it.Current()->GetUser();
-            DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
-          }
-        }
-      } else {
-        array_length->ReplaceWith(new_array_length);
-      }
-
-      array_length->GetBlock()->RemoveInstruction(array_length);
-      // Use new_array_length for deopt.
-      array_length = new_array_length;
-    }
-
-    HInstruction* added = array_length;
-    if (offset != 0) {
-      HIntConstant* offset_instr = graph->GetIntConstant(offset);
-      added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
-      deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
-    }
-    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
-    HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
-    deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
-  }
-
-  // Adds deoptimizations in loop pre-header with the collected array access
-  // data so that value ranges can be established in loop body.
-  // Returns true if deoptimizations are successfully added, or if it's proven
-  // it's not necessary.
-  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
-    int32_t offset_low = finder.GetOffsetLow();
-    int32_t offset_high = finder.GetOffsetHigh();
-    HArrayLength* array_length = finder.GetFoundArrayLength();
-
-    HBasicBlock* pre_header =
-        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't move initial_ or end_ into pre_header for comparisons.
-      return false;
-    }
-
-    HBasicBlock* deopt_block;
-    bool loop_entry_test_block_added = false;
-    bool is_constant_proven, is_length_proven;
-
-    HInstruction* const_comparing_instruction;
-    int32_t const_compared_to;
-    HInstruction* array_length_comparing_instruction;
-    int32_t array_length_offset;
-    if (increment_ == 1) {
-      // Increasing from initial_ to end_.
-      const_comparing_instruction = initial_;
-      const_compared_to = -offset_low;
-      array_length_comparing_instruction = end_;
-      array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
-    } else {
-      const_comparing_instruction = end_;
-      const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
-      array_length_comparing_instruction = initial_;
-      array_length_offset = -offset_high - 1;
-    }
-
-    if (CanAddDeoptimizationConstant(const_comparing_instruction,
-                                     const_compared_to,
-                                     &is_constant_proven) &&
-        CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                        array_length,
-                                        array_length_offset,
-                                        &is_length_proven)) {
-      if (!is_constant_proven || !is_length_proven) {
-        deopt_block = TransformLoopForDeoptimizationIfNeeded();
-        loop_entry_test_block_added = (deopt_block != pre_header);
-        if (loop_entry_test_block_added) {
-          // Loop body may be entered.
-          AddLoopBodyEntryTest();
-        }
-      }
-      if (!is_constant_proven) {
-        AddDeoptimizationConstant(const_comparing_instruction,
-                                  const_compared_to,
-                                  deopt_block,
-                                  loop_entry_test_block_added);
-      }
-      if (!is_length_proven) {
-        AddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                     array_length,
-                                     array_length_offset,
-                                     deopt_block,
-                                     loop_entry_test_block_added);
-      }
-      return true;
-    }
-    return false;
-  }
-
  private:
   HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
   HInstruction* const initial_;     // Initial value.
-  HInstruction* end_;               // End value.
-  bool inclusive_;                  // Whether end value is inclusive.
   const int32_t increment_;         // Increment for each loop iteration.
   const ValueBound bound_;          // Additional value bound info for initial_.
 
@@ -1111,7 +499,9 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BCEVisitor(HGraph* graph,
+             const SideEffectsAnalysis& side_effects,
+             HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
         maps_(graph->GetBlocks().size(),
               ArenaSafeMap<int, ValueRange*>(
@@ -1121,8 +511,17 @@
         first_constant_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        early_exit_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        taken_test_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         need_to_revisit_block_(false),
+        has_deoptimization_on_constant_subscripts_(false),
         initial_block_size_(graph->GetBlocks().size()),
+        side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -1138,6 +537,17 @@
     }
   }
 
+  void Finish() {
+    // Preserve SSA structure which may have been broken by adding one or more
+    // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+    InsertPhiNodes();
+
+    // Clear the loop data structures.
+    early_exit_loop_.clear();
+    taken_test_loop_.clear();
+    finite_loop_.clear();
+  }
+
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
   ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1166,25 +576,6 @@
     return nullptr;
   }
 
-  // Return the range resulting from induction variable analysis of "instruction" when the value
-  // is used from "context", for example, an index used from a bounds-check inside a loop body.
-  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1;
-    InductionVarRange::Value v2;
-    bool needs_finite_test = false;
-    induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
-      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
-      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
-    }
-    // Didn't find anything useful.
-    return nullptr;
-  }
-
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1330,17 +721,6 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() < 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondLT);
-        }
-      }
-
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1364,17 +744,6 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() > 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondGT);
-        }
-      }
-
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -1400,38 +769,34 @@
     }
   }
 
-  void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
     DCHECK(array_length->IsIntConstant() ||
            array_length->IsArrayLength() ||
            array_length->IsPhi());
-
-    if (array_length->IsPhi()) {
-      // Input 1 of the phi contains the real array.length once the loop body is
-      // entered. That value will be used for bound analysis. The graph is still
-      // strictly in SSA form.
-      array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
-    }
+    bool try_dynamic_bce = true;
 
     if (!index->IsIntConstant()) {
+      // Non-constant subscript.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by local analysis.
+      // Try range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+        ReplaceInstruction(bounds_check, index);
         return;
       }
       // Try range obtained by induction variable analysis.
-      index_range = LookupInductionRange(bounds_check, index);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+      // Disables dynamic bce if OOB is certain.
+      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+        ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
+      // Constant subscript.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
@@ -1439,7 +804,7 @@
       }
       if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
         }
         return;
       }
@@ -1450,7 +815,7 @@
         ValueBound lower = existing_range->GetLower();
         DCHECK(lower.IsConstant());
         if (constant < lower.GetConstant()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
           return;
         } else {
           // Existing range isn't strong enough to eliminate the bounds check.
@@ -1485,11 +850,11 @@
           ValueRange(GetGraph()->GetArena(), lower, upper);
       GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
-  }
 
-  void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
-    bounds_check->ReplaceWith(index);
-    bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+    // If static analysis fails, and OOB is not certain, try dynamic elimination.
+    if (try_dynamic_bce) {
+      TryDynamicBCE(bounds_check);
+    }
   }
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
@@ -1508,7 +873,7 @@
     return true;
   }
 
-  void VisitPhi(HPhi* phi) {
+  void VisitPhi(HPhi* phi) OVERRIDE {
     if (phi->IsLoopHeaderPhi()
         && (phi->GetType() == Primitive::kPrimInt)
         && HasSameInputAtBackEdges(phi)) {
@@ -1555,7 +920,7 @@
     }
   }
 
-  void VisitIf(HIf* instruction) {
+  void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
       IfCondition cmp = cond->GetCondition();
@@ -1564,42 +929,11 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
-
-        HBasicBlock* block = instruction->GetBlock();
-        ValueRange* left_range = LookupValueRange(left, block);
-        if (left_range == nullptr) {
-          return;
-        }
-
-        if (left_range->IsMonotonicValueRange() &&
-            block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
-          // The comparison is for an induction variable in the loop header.
-          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
-          HBasicBlock* loop_body_successor =
-            left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
-          if (loop_body_successor == nullptr) {
-            // In case it's some strange loop structure.
-            return;
-          }
-          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
-          if ((new_left_range == left_range) ||
-              // Range narrowed with deoptimization is usually more useful than
-              // a constant range.
-              new_left_range->IsConstantValueRange()) {
-            // We are not successful in narrowing the monotonic value range to
-            // a regular value range. Try using deoptimization.
-            new_left_range = left_range->AsMonotonicValueRange()->
-                NarrowWithDeoptimization();
-            if (new_left_range != left_range) {
-              GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
-            }
-          }
-        }
       }
     }
   }
 
-  void VisitAdd(HAdd* add) {
+  void VisitAdd(HAdd* add) OVERRIDE {
     HInstruction* right = add->GetRight();
     if (right->IsIntConstant()) {
       ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1613,7 +947,7 @@
     }
   }
 
-  void VisitSub(HSub* sub) {
+  void VisitSub(HSub* sub) OVERRIDE {
     HInstruction* left = sub->GetLeft();
     HInstruction* right = sub->GetRight();
     if (right->IsIntConstant()) {
@@ -1715,19 +1049,19 @@
     }
   }
 
-  void VisitDiv(HDiv* div) {
+  void VisitDiv(HDiv* div) OVERRIDE {
     FindAndHandlePartialArrayLength(div);
   }
 
-  void VisitShr(HShr* shr) {
+  void VisitShr(HShr* shr) OVERRIDE {
     FindAndHandlePartialArrayLength(shr);
   }
 
-  void VisitUShr(HUShr* ushr) {
+  void VisitUShr(HUShr* ushr) OVERRIDE {
     FindAndHandlePartialArrayLength(ushr);
   }
 
-  void VisitAnd(HAnd* instruction) {
+  void VisitAnd(HAnd* instruction) OVERRIDE {
     if (instruction->GetRight()->IsIntConstant()) {
       int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
       if (constant > 0) {
@@ -1742,7 +1076,7 @@
     }
   }
 
-  void VisitNewArray(HNewArray* new_array) {
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->InputAt(0);
     if (!len->IsIntConstant()) {
       HInstruction *left;
@@ -1766,9 +1100,12 @@
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) {
-    // Right now it's only HLessThanOrEqual.
-    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
+      return;
+    }
+    // If this instruction was added by AddCompareWithDeoptimization(), narrow
+    // the range accordingly in subsequent basic blocks.
     HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
     HInstruction* instruction = less_than_or_equal->InputAt(0);
     if (instruction->IsArrayLength()) {
@@ -1782,6 +1119,35 @@
     }
   }
 
+  /**
+    * After null/bounds checks are eliminated, some invariant array references
+    * may be exposed underneath which can be hoisted out of the loop to the
+    * preheader or, in combination with dynamic bce, the deoptimization block.
+    *
+    * for (int i = 0; i < n; i++) {
+    *                                <-------+
+    *   for (int j = 0; j < n; j++)          |
+    *     a[i][j] = 0;               --a[i]--+
+    * }
+    *
+    * Note: this optimization is no longer applied after deoptimization on array references
+    * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
+    * those cases it would be unsafe to hoist array references across their deoptimization
+    * instruction inside a loop.
+    */
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+      HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+      if (loop->IsLoopInvariant(array_get->InputAt(0), false) &&
+          loop->IsLoopInvariant(array_get->InputAt(1), false)) {
+        SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+        if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+          HoistToPreheaderOrDeoptBlock(loop, array_get);
+        }
+      }
+    }
+  }
+
   void AddCompareWithDeoptimization(HInstruction* array_length,
                                     HIntConstant* const_instr,
                                     HBasicBlock* block) {
@@ -1803,6 +1169,9 @@
     block->InsertInstructionBefore(cond, bounds_check);
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+    // Flag that this kind of deoptimization on array references with constant
+    // subscripts has occurred to prevent further hoisting of these references.
+    has_deoptimization_on_constant_subscripts_ = true;
   }
 
   void AddComparesWithDeoptimization(HBasicBlock* block) {
@@ -1846,21 +1215,425 @@
     }
   }
 
+  /**
+   * Returns true if static range analysis based on induction variables can determine the bounds
+   * check on the given array range is always satisfied with the computed index range. The output
+   * parameter try_dynamic_bce is set to false if OOB is certain.
+   */
+  bool InductionRangeFitsIn(ValueRange* array_range,
+                            HInstruction* context,
+                            HInstruction* index,
+                            bool* try_dynamic_bce) {
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
+    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+      ValueRange index_range(GetGraph()->GetArena(),
+                             ValueBound(v1.instruction, v1.b_constant),
+                             ValueBound(v2.instruction, v2.b_constant));
+      // If analysis reveals a certain OOB, disable dynamic BCE.
+      *try_dynamic_bce = !index_range.GetLower().LessThan(array_range->GetLower()) &&
+                         !index_range.GetUpper().GreaterThan(array_range->GetUpper());
+      // Use analysis for static bce only if loop is finite.
+      return !needs_finite_test && index_range.FitsIn(array_range);
+    }
+    return false;
+  }
+
+  /**
+   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
+   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
+   * will go out of range (we want to be rather certain of that given the slowdown of
+   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
+   * bounds checks and related null checks removed.
+   */
+  void TryDynamicBCE(HBoundsCheck* instruction) {
+    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
+    HInstruction* index = instruction->InputAt(0);
+    HInstruction* length = instruction->InputAt(1);
+    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
+    bool needs_finite_test = false;
+    bool needs_taken_test = false;
+    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
+        induction_range_.CanGenerateCode(
+            instruction, index, &needs_finite_test, &needs_taken_test) &&
+        CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
+      HInstruction* lower = nullptr;
+      HInstruction* upper = nullptr;
+      // Generate the following unsigned comparisons
+      //     if (lower > upper)   deoptimize;
+      //     if (upper >= length) deoptimize;
+      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
+      //     if (upper >= length) deoptimize;
+      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
+      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
+      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
+      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
+      if (lower != nullptr) {
+        InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+      }
+      InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+      ReplaceInstruction(instruction, index);
+    }
+  }
+
+  /**
+   * Returns true if heuristics indicate that dynamic bce may be profitable.
+   */
+  bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+    if (loop != nullptr) {
+      // A try boundary preheader is hard to handle.
+      // TODO: remove this restriction
+      if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+        return false;
+      }
+      // Does loop have early-exits? If so, the full range may not be covered by the loop
+      // at runtime and testing the range may apply deoptimization unnecessarily.
+      if (IsEarlyExitLoop(loop)) {
+        return false;
+      }
+      // Does the current basic block dominate all back edges? If not,
+      // don't apply dynamic bce to something that may not be executed.
+      for (HBasicBlock* back_edge : loop->GetBackEdges()) {
+        if (!block->Dominates(back_edge)) {
+          return false;
+        }
+      }
+      // Success!
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the loop has early exits, which implies it may not cover
+   * the full range computed by range analysis based on induction variables.
+   */
+  bool IsEarlyExitLoop(HLoopInformation* loop) {
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+    auto it = early_exit_loop_.find(loop_id);
+    if (it != early_exit_loop_.end()) {
+      return it->second;
+    }
+    // First time early-exit analysis for this loop. Since analysis requires scanning
+    // the full loop-body, results of the analysis is stored for subsequent queries.
+    HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+    for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+      for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+        if (!loop->Contains(*successor)) {
+          early_exit_loop_.Put(loop_id, true);
+          return true;
+        }
+      }
+    }
+    early_exit_loop_.Put(loop_id, false);
+    return false;
+  }
+
+  /**
+   * Returns true if the array length is already loop invariant, or can be made so
+   * by handling the null check under the hood of the array length operation.
+   */
+  bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+    if (loop->IsLoopInvariant(length, false)) {
+      return true;
+    } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+      if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+        HoistToPreheaderOrDeoptBlock(loop, length);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the null check is already loop invariant, or can be made so
+   * by generating a deoptimization test.
+   */
+  bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+    if (loop->IsLoopInvariant(check, false)) {
+      return true;
+    } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+      HInstruction* array = check->InputAt(0);
+      if (loop->IsLoopInvariant(array, false)) {
+        // Generate: if (array == null) deoptimize;
+        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HInstruction* cond =
+            new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+        InsertDeopt(loop, block, cond);
+        ReplaceInstruction(check, array);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if compiler can apply dynamic bce to loops that may be infinite
+   * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+   * the range analysis evaluation code by "overshooting" the computed range.
+   * Since deoptimization would be a bad choice, and there is no other version
+   * of the loop to use, dynamic bce in such cases is only allowed if other tests
+   * ensure the loop is finite.
+   */
+  bool CanHandleInfiniteLoop(
+      HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+    if (needs_infinite_test) {
+      // If we already forced the loop to be finite, allow directly.
+      const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+      if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+        return true;
+      }
+      // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+      // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+      // ensure upper bound cannot cause an infinite loop.
+      HInstruction* control = loop->GetHeader()->GetLastInstruction();
+      if (control->IsIf()) {
+        HInstruction* if_expr = control->AsIf()->InputAt(0);
+        if (if_expr->IsCondition()) {
+          HCondition* condition = if_expr->AsCondition();
+          if (index == condition->InputAt(0) ||
+              index == condition->InputAt(1)) {
+            finite_loop_.insert(loop_id);
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /** Inserts a deoptimization test. */
+  void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+    HInstruction* suspend = loop->GetSuspendCheck();
+    block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+    if (suspend->HasEnvironment()) {
+      deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+          suspend->GetEnvironment(), loop->GetHeader());
+    }
+  }
+
+  /** Hoists instruction out of the loop to preheader or deoptimization block. */
+  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    HBasicBlock* preheader = loop->GetPreHeader();
+    HBasicBlock* block = preheader;
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      block = it->second;
+    }
+    // Hoist the instruction.
+    DCHECK(!instruction->HasEnvironment());
+    instruction->MoveBefore(block->GetLastInstruction());
+  }
+
+  /**
+   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * The taken-test protects range analysis evaluation code to avoid any
+   * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+   *
+   * Returns block in which deoptimizations/invariants can be put.
+   *
+   *          old_preheader
+   *               |
+   *            if_block          <- taken-test protects deoptimization block
+   *            /      \
+   *     true_block  false_block  <- deoptimizations/invariants are placed in true_block
+   *            \       /
+   *          new_preheader       <- may require phi nodes to preserve SSA structure
+   *                |
+   *             header
+   *
+   * For example, this loop:
+   *
+   *   for (int i = lower; i < upper; i++) {
+   *     array[i] = 0;
+   *   }
+   *
+   * will be transformed to:
+   *
+   *   if (lower < upper) {
+   *     if (array == null) deoptimize;
+   *     array_length = array.length;
+   *     if (lower > upper)         deoptimize;  // unsigned
+   *     if (upper >= array_length) deoptimize;  // unsigned
+   *   } else {
+   *     array_length = 0;
+   *   }
+   *   for (int i = lower; i < upper; i++) {
+   *     // Loop without null check and bounds check, and any array.length replaced with array_length.
+   *     array[i] = 0;
+   *   }
+   */
+  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader), or already done (can reuse)?
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    if (!needs_taken_test) {
+      return loop->GetPreHeader();
+    } else {
+      auto it = taken_test_loop_.find(loop_id);
+      if (it != taken_test_loop_.end()) {
+        return it->second;
+      }
+    }
+
+    // Generate top test structure.
+    HBasicBlock* header = loop->GetHeader();
+    GetGraph()->TransformLoopHeaderForBCE(header);
+    HBasicBlock* new_preheader = loop->GetPreHeader();
+    HBasicBlock* if_block = new_preheader->GetDominator();
+    HBasicBlock* true_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* false_block = if_block->GetSuccessors()[1];  // False successor.
+
+    // Goto instructions.
+    true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+    // Insert the taken-test to see if the loop body is entered. If the
+    // loop isn't entered at all, it jumps around the deoptimization block.
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());  // placeholder
+    HInstruction* condition = nullptr;
+    induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+                                       GetGraph(),
+                                       if_block,
+                                       &condition);
+    DCHECK(condition != nullptr);
+    if_block->RemoveInstruction(if_block->GetLastInstruction());
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+    taken_test_loop_.Put(loop_id, true_block);
+    return true_block;
+  }
+
+  /**
+   * Inserts phi nodes that preserve SSA structure in generated top test structures.
+   * All uses of instructions in the deoptimization block that reach the loop need
+   * a phi node in the new loop preheader to fix the dominance relation.
+   *
+   * Example:
+   *           if_block
+   *            /      \
+   *         x_0 = ..  false_block
+   *            \       /
+   *           x_1 = phi(x_0, null)   <- synthetic phi
+   *               |
+   *             header
+   */
+  void InsertPhiNodes() {
+    // Scan all new deoptimization blocks.
+    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+      HBasicBlock* true_block = it1->second;
+      HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+      // Scan all instructions in a new deoptimization block.
+      for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* instruction = it.Current();
+        Primitive::Type type = instruction->GetType();
+        HPhi* phi = nullptr;
+        // Scan all uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HInstruction*> it2(instruction->GetUses());
+             !it2.Done();
+             it2.Advance()) {
+          HInstruction* user = it2.Current()->GetUser();
+          if (user->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->ReplaceInput(phi, it2.Current()->GetIndex());
+          }
+        }
+        // Scan all environment uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
+             !it2.Done();
+             it2.Advance()) {
+          HEnvironment* user = it2.Current()->GetUser();
+          if (user->GetHolder()->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->RemoveAsUserOfInput(it2.Current()->GetIndex());
+            user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
+            phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+   * These are synthetic phi nodes without a virtual register.
+   */
+  HPhi* NewPhi(HBasicBlock* new_preheader,
+               HInstruction* instruction,
+               Primitive::Type type) {
+    HGraph* graph = GetGraph();
+    HInstruction* zero;
+    switch (type) {
+      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      default: zero = graph->GetConstant(type, 0); break;
+    }
+    HPhi* phi = new (graph->GetArena())
+        HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+    phi->SetRawInputAt(0, instruction);
+    phi->SetRawInputAt(1, zero);
+    new_preheader->AddPhi(phi);
+    return phi;
+  }
+
+  /** Helper method to replace an instruction with another instruction. */
+  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    instruction->ReplaceWith(replacement);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+
+  // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
   // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
   // a block that checks a constant index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
 
+  // Early-exit loop bookkeeping.
+  ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+  // Taken-test loop bookkeeping.
+  ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+  // Finite loop bookkeeping.
+  ArenaSet<uint32_t> finite_loop_;
+
   // For the block, there is at least one HArrayLength instruction for which there
   // is more than one bounds check instruction with constant indexing. And it's
   // beneficial to add a compare instruction that has deoptimization fallback and
   // eliminate those bounds checks.
   bool need_to_revisit_block_;
 
+  // Flag that denotes whether deoptimization has occurred on array references
+  // with constant subscripts (see AddCompareWithDeoptimization()).
+  bool has_deoptimization_on_constant_subscripts_;
+
   // Initial number of blocks.
   uint32_t initial_block_size_;
 
+  // Side effects.
+  const SideEffectsAnalysis& side_effects_;
+
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
@@ -1872,14 +1645,12 @@
     return;
   }
 
-  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
-  //
-  // TODO: only visit blocks that dominate some array accesses.
+  BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
   HBasicBlock* last_visited_block = nullptr;
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
@@ -1896,6 +1667,9 @@
     visitor.VisitBasicBlock(current);
     last_visited_block = current;
   }
+
+  // Perform cleanup.
+  visitor.Finish();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca..b9df686 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,12 +21,16 @@
 
 namespace art {
 
+class SideEffectsAnalysis;
 class HInductionVarAnalysis;
 
 class BoundsCheckElimination : public HOptimization {
  public:
-  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BoundsCheckElimination(HGraph* graph,
+                         const SideEffectsAnalysis& side_effects,
+                         HInductionVarAnalysis* induction_analysis)
       : HOptimization(graph, kBoundsCheckEliminiationPassName),
+        side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
@@ -34,6 +38,7 @@
   static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
 
  private:
+  const SideEffectsAnalysis& side_effects_;
   HInductionVarAnalysis* induction_analysis_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2..dbeb1cc 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -54,7 +54,7 @@
     HInductionVarAnalysis induction(graph_);
     induction.Run();
 
-    BoundsCheckElimination(graph_, &induction).Run();
+    BoundsCheckElimination(graph_, side_effects, &induction).Run();
   }
 
   ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index d7754e8..8e75bdc 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -735,6 +735,79 @@
   }
 }
 
+ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time compilation),
+      // bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    uint16_t vtable_index = resolved_method->GetMethodIndex();
+    ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+        vtable_index, class_linker->GetImagePointerSize());
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      // could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
                                 uint32_t dex_pc,
                                 uint32_t method_idx,
@@ -742,22 +815,18 @@
                                 bool is_range,
                                 uint32_t* args,
                                 uint32_t register_index) {
-  InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  InvokeType optimized_invoke_type = original_invoke_type;
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
   const char* descriptor = dex_file_->GetMethodShorty(method_idx);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
 
   // Remove the return type from the 'proto'.
   size_t number_of_arguments = strlen(descriptor) - 1;
-  if (original_invoke_type != kStatic) {  // instance call
+  if (invoke_type != kStatic) {  // instance call
     // One extra argument for 'this'.
     number_of_arguments++;
   }
 
   MethodReference target_method(dex_file_, method_idx);
-  int32_t table_index = 0;
-  uintptr_t direct_code = 0;
-  uintptr_t direct_method = 0;
 
   // Special handling for string init.
   int32_t string_init_offset = 0;
@@ -780,7 +849,7 @@
         method_idx,
         target_method,
         dispatch_info,
-        original_invoke_type,
+        invoke_type,
         kStatic /* optimized_invoke_type */,
         HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
     return HandleStringInit(invoke,
@@ -791,23 +860,16 @@
                             descriptor);
   }
 
-  // Handle unresolved methods.
-  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
-                                           dex_pc,
-                                           true /* update_stats */,
-                                           true /* enable_devirtualization */,
-                                           &optimized_invoke_type,
-                                           &target_method,
-                                           &table_index,
-                                           &direct_code,
-                                           &direct_method)) {
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (resolved_method == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
                                                      dex_pc,
                                                      method_idx,
-                                                     original_invoke_type);
+                                                     invoke_type);
     return HandleInvoke(invoke,
                         number_of_vreg_arguments,
                         args,
@@ -817,21 +879,26 @@
                         nullptr /* clinit_check */);
   }
 
-  // Handle resolved methods (non string init).
-
-  DCHECK(optimized_invoke_type != kSuper);
-
   // Potential class initialization check, in the case of a static method call.
   HClinitCheck* clinit_check = nullptr;
   HInvoke* invoke = nullptr;
 
-  if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
     // By default, consider that the called method implicitly requires
     // an initialization check of its declaring method.
     HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
         = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    if (optimized_invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
     }
 
     HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
@@ -847,24 +914,26 @@
                                                 method_idx,
                                                 target_method,
                                                 dispatch_info,
-                                                original_invoke_type,
-                                                optimized_invoke_type,
+                                                invoke_type,
+                                                invoke_type,
                                                 clinit_check_requirement);
-  } else if (optimized_invoke_type == kVirtual) {
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeVirtual(arena_,
                                          number_of_arguments,
                                          return_type,
                                          dex_pc,
                                          method_idx,
-                                         table_index);
+                                         resolved_method->GetMethodIndex());
   } else {
-    DCHECK_EQ(optimized_invoke_type, kInterface);
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeInterface(arena_,
                                            number_of_arguments,
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           table_index);
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
@@ -962,23 +1031,18 @@
 
 HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* resolved_method,
       uint32_t method_idx,
       HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
-      soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
-
-  DCHECK(resolved_method != nullptr);
-
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+          self, *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
   Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 5ada93f..c3979f3 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -305,8 +305,10 @@
 
   HClinitCheck* ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* method,
       uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Build a HNewInstance instruction.
   bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
@@ -315,6 +317,10 @@
   bool IsInitialized(Handle<mirror::Class> cls) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a98d9c6..ac6b5e8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -724,7 +724,9 @@
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -1922,10 +1924,18 @@
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -5818,16 +5828,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   if (desired_dispatch_info.code_ptr_location ==
       HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5850,6 +5850,32 @@
   return desired_dispatch_info;
 }
 
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   switch (invoke->GetCodePtrLocation()) {
@@ -5881,11 +5907,15 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
@@ -5943,12 +5973,16 @@
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -5970,7 +6004,11 @@
 
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5996,6 +6034,28 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const auto& pair : dex_cache_arrays_base_labels_) {
+    HArmDexCacheArraysBase* base = pair.first;
+    const DexCacheArraysBaseLabels* labels = &pair.second;
+    const DexFile& dex_file = base->GetDexFile();
+    size_t base_element_offset = base->GetElementOffset();
+    DCHECK(labels->add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(labels->movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(labels->movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -6107,6 +6167,23 @@
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+  codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 89de4f8..193add2 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -373,6 +373,31 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct DexCacheArraysBaseLabels {
+    DexCacheArraysBaseLabels() = default;
+    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+    DexCacheArraysBaseLabels labels;
+    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+  }
+
+  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+    auto it = dex_cache_arrays_base_labels_.find(base);
+    DCHECK(it != dex_cache_arrays_base_labels_.end());
+    return &it->second;
+  }
+
   // Generate a read barrier for a heap reference within `instruction`.
   //
   // A read barrier for an object reference read from the heap is
@@ -419,7 +444,12 @@
   void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
 
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+                                                     DexCacheArraysBaseLabels,
+                                                     std::less<HArmDexCacheArraysBase*>>;
 
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -441,6 +471,8 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
 
+  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index ac16268..04acd9d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3618,8 +3618,12 @@
 }
 
 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   Register temp = XRegisterFrom(temp_in);
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
@@ -3630,11 +3634,10 @@
 
   DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+  __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
-  // However this is not required in practice, as this is an
   // intermediate/temporary reference and because the current
   // concurrent copying collector keeps the from-space memory
   // intact/accessible until the end of the marking phase (the
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 934f24b..bc5eb31 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2986,8 +2986,13 @@
 }
 
 void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  GpuRegister receiver = calling_convention.GetRegisterAt(0);
+
   GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
@@ -2995,8 +3000,7 @@
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
 
   // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1fc09a8..2fb87d3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1929,8 +1929,7 @@
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
   if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(),
-                                    Location::RequiresRegister());
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 
   if (codegen_->IsBaseline()) {
@@ -1970,6 +1969,11 @@
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -4151,12 +4155,16 @@
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+  __ movl(temp, Address(receiver, class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 534ee1c..4618be9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -802,12 +802,17 @@
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
+
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+  __ movl(temp, Address(CpuRegister(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..6582063
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000..015f910
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index b3b09d2..c16b872 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -739,7 +739,9 @@
   // created for constants which were untyped in DEX. Note that this test can be skipped for
   // a synthetic phi (indicated by lack of a virtual register).
   if (phi->GetRegNumber() != kNoRegNumber) {
-    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+         !phi_it.Done();
+         phi_it.Advance()) {
       HPhi* other_phi = phi_it.Current()->AsPhi();
       if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
         if (phi->GetType() == other_phi->GetType()) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 48bcd10..e9fdb84 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -393,15 +393,18 @@
 
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     VisitInvoke(invoke);
-    StartAttributeStream("recursive") << std::boolalpha
-                                      << invoke->IsRecursive()
-                                      << std::noboolalpha;
+    StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
     if (invoke->IsStatic()) {
       StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
     }
   }
 
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    VisitInvoke(invoke);
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+  }
+
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
     StartAttributeStream("field_type") << field_access->GetFieldType();
   }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index b40ef5a..2ac1e15 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -425,9 +425,13 @@
     }
     HInductionVarAnalysis::InductionInfo* trip =
         induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    // Determine what tests are needed.
+    // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+    // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+    // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+    // code does not use the trip-count explicitly (since there could be an implicit relation
+    // between e.g. an invariant subscript and a not-taken condition).
     *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-    *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip);
+    *needs_taken_test = IsBodyTripCount(trip);
     // Code generation for taken test: generate the code when requested or otherwise analyze
     // if code generation is feasible when taken test is needed.
     if (taken_test != nullptr) {
@@ -512,10 +516,13 @@
             }
             break;
           case HInductionVarAnalysis::kFetch:
-            if (graph != nullptr) {
-              *result = info->fetch;  // already in HIR
+            if (info->fetch->GetType() == type) {
+              if (graph != nullptr) {
+                *result = info->fetch;  // already in HIR
+              }
+              return true;
             }
-            return true;
+            break;
           case HInductionVarAnalysis::kTripCountInLoop:
           case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
@@ -545,29 +552,42 @@
         }
         break;
       case HInductionVarAnalysis::kLinear: {
-          // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
-          // to avoid arithmetic wrap-around situations that are hard to guard against.
-          int32_t stride_value = 0;
-          if (GetConstant(info->op_a, &stride_value)) {
-            if (stride_value == 1 || stride_value == -1) {
-              const bool is_min_a = stride_value == 1 ? is_min : !is_min;
-              if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
-                  GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
-                if (graph != nullptr) {
-                  HInstruction* oper;
-                  if (stride_value == 1) {
-                    oper = new (graph->GetArena()) HAdd(type, opa, opb);
-                  } else {
-                    oper = new (graph->GetArena()) HSub(type, opb, opa);
-                  }
-                  *result = Insert(block, oper);
+        // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
+        // to avoid arithmetic wrap-around situations that are hard to guard against.
+        int32_t stride_value = 0;
+        if (GetConstant(info->op_a, &stride_value)) {
+          if (stride_value == 1 || stride_value == -1) {
+            const bool is_min_a = stride_value == 1 ? is_min : !is_min;
+            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
                 }
-                return true;
+                *result = Insert(block, oper);
               }
+              return true;
             }
           }
         }
         break;
+      }
+      case HInductionVarAnalysis::kWrapAround:
+      case HInductionVarAnalysis::kPeriodic: {
+        // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+        // values are easy to test at runtime without complications of arithmetic wrap-around.
+        Value extreme = GetVal(info, trip, in_body, is_min);
+        if (extreme.is_known && extreme.a_constant == 0) {
+          if (graph != nullptr) {
+            *result = graph->GetIntConstant(extreme.b_constant);
+          }
+          return true;
+        }
+        break;
+      }
       default:
         break;
     }
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0363f20..a4dcb3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -171,13 +171,37 @@
                                   const DexFile& dex_file,
                                   uint32_t referrer_index)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
     return method->GetDexMethodIndex();
   } else {
     return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index);
   }
 }
 
+static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls->GetDexCache() == nullptr) {
+    DCHECK(cls->IsArrayClass());
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    // TODO: deal with proxy classes.
+    return DexFile::kDexNoIndex;
+  } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    // Update the dex cache to ensure the class is in. The generated code will
+    // consider it is. We make it safe by updating the dex cache, as other
+    // dex files might also load the class, and there is no guarantee the dex
+    // cache of the dex file of the class will be updated.
+    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
+      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    }
+    return cls->GetDexTypeIndex();
+  } else {
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  }
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -192,6 +216,10 @@
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+      VLOG(compiler) << "Not inlining a String.<init> method";
+      return false;
+    }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
     mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
@@ -210,53 +238,176 @@
     return false;
   }
 
-  if (!invoke_instruction->IsInvokeStaticOrDirect()) {
-    resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
-    if (resolved_method == nullptr) {
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    return TryInline(invoke_instruction, resolved_method);
+  }
+
+  // Check if we can statically find the method.
+  ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+  if (actual_method != nullptr) {
+    return TryInline(invoke_instruction, actual_method);
+  }
+
+  // Check if we can use an inline cache.
+  ArtMethod* caller = graph_->GetArtMethod();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  // Under JIT, we should always know the caller.
+  DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr));
+  if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) {
+    ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size);
+    const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+    if (ic.IsUnitialized()) {
       VLOG(compiler) << "Interface or virtual call to "
                      << PrettyMethod(method_index, caller_dex_file)
-                     << " could not be statically determined";
+                     << " is not hit and not inlined";
       return false;
-    }
-    // We have found a method, but we need to find where that method is for the caller's
-    // dex file.
-    method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index);
-    if (method_index == DexFile::kDexNoIndex) {
+    } else if (ic.IsMonomorphic()) {
+      MaybeRecordStat(kMonomorphicCall);
+      return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+    } else if (ic.IsPolymorphic()) {
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+    } else {
+      DCHECK(ic.IsMegamorphic());
       VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(resolved_method)
-                     << " cannot be inlined because unaccessible to caller";
+                     << PrettyMethod(method_index, caller_dex_file)
+                     << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
       return false;
     }
   }
 
-  bool same_dex_file =
-      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
+  VLOG(compiler) << "Interface or virtual call to "
+                 << PrettyMethod(method_index, caller_dex_file)
+                 << " could not be statically determined";
+  return false;
+}
 
-  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  if (class_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because its class is not"
+                   << " accessible to the caller";
+    return false;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+        resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+        resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      invoke_instruction->GetDexPc());
+
+  bool is_referrer =
+      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(load_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(field_get, load_class);
+  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed, and eventually propagate the
+  // type of the receiver.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedMonomorphicCall);
+  return true;
+}
+
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
+  // TODO
+  VLOG(compiler) << "Unimplemented polymorphic inlining for "
+                 << PrettyMethod(resolved_method);
+  return false;
+}
+
+bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t method_index = FindMethodIndexIn(
+      method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+  if (method_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to "
+                   << PrettyMethod(method)
+                   << " cannot be inlined because unaccessible to caller";
+    return false;
+  }
+
+  bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
+
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is too big to inline";
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of try block";
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
-    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+  if (!method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
     if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
+          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -273,7 +424,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) {
+  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
     return false;
   }
 
@@ -284,7 +435,8 @@
 
 bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
                                  HInvoke* invoke_instruction,
-                                 bool same_dex_file) {
+                                 bool same_dex_file,
+                                 bool do_rtp) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -337,6 +489,7 @@
       invoke_type,
       graph_->IsDebuggable(),
       graph_->GetCurrentInstructionId());
+  callee_graph->SetArtMethod(resolved_method);
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
@@ -418,6 +571,7 @@
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
+                     outermost_graph_,
                      codegen_,
                      outer_compilation_unit_,
                      dex_compilation_unit,
@@ -529,9 +683,9 @@
   HNullConstant* null_constant = graph_->GetNullConstant();
   if (!null_constant->GetReferenceTypeInfo().IsValid()) {
     ReferenceTypeInfo::TypeHandle obj_handle =
-            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
+        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
     null_constant->SetReferenceTypeInfo(
-            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
   }
 
   // Check the integrity of reference types and run another type propagation if needed.
@@ -550,14 +704,16 @@
          return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
 
-    // If the return type is a refinement of the declared type run the type propagation again.
-    ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-    ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-    if (invoke_rti.IsStrictSupertypeOf(return_rti)
-        || (return_rti.IsExact() && !invoke_rti.IsExact())
-        || !return_replacement->CanBeNull()) {
-      ReferenceTypePropagation rtp_fixup(graph_, handles_);
-      rtp_fixup.Run();
+    if (do_rtp) {
+      // If the return type is a refinement of the declared type run the type propagation again.
+      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+      if (invoke_rti.IsStrictSupertypeOf(return_rti)
+          || (return_rti.IsExact() && !invoke_rti.IsExact())
+          || !return_replacement->CanBeNull()) {
+        ReferenceTypePropagation rtp_fixup(graph_, handles_);
+        rtp_fixup.Run();
+      }
     }
   }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 0f6a945..7b9fb73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -27,11 +27,13 @@
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
+class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           HGraph* outermost_graph,
            CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
@@ -40,6 +42,7 @@
            OptimizingCompilerStats* stats,
            size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
+        outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
@@ -54,10 +57,33 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction);
+
+  // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
+  // reference type propagation can run after the inlining.
+  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline the target of a monomorphic call. If successful, the code
+  // in the graph will look like:
+  // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
+  // ... // inlined code
+  bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool TryBuildAndInline(ArtMethod* resolved_method,
                          HInvoke* invoke_instruction,
-                         bool same_dex_file);
+                         bool same_dex_file,
+                         bool do_rtp = true);
 
+  HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b01324e..8340811 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -384,7 +384,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
-      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
       invoke->IsInvokeVirtual() ? kVirtual : kSuper;
   switch (intrinsic_type) {
     case kStatic:
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..02befc0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -121,6 +121,8 @@
           // phi in it.
           if (instruction->NeedsEnvironment()) {
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
+          } else {
+            DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
         } else if (instruction->CanThrow()) {
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index b5ac773..9b26de4 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1177,6 +1177,59 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  HUseIterator<HInstruction*> uses_it(GetUses());
+  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+  uses_it.Advance();
+  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+    uses_it.Advance();
+  }
+  if (!uses_it.Done()) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; !uses_it.Done(); uses_it.Advance()) {
+      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = uses_it2.Current()->GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
   DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -2079,6 +2132,26 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      return os << "string_init";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      return os << "recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      return os << "direct";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      return os << "direct_fixup";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      return os << "dex_cache_pc_relative";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      return os << "dex_cache_via_method";
+    default:
+      LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
@@ -2088,7 +2161,8 @@
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
       return os << "none";
     default:
-      return os << "unknown:" << static_cast<int>(rhs);
+      LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d5110a7..9d3c88c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -371,6 +371,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  ArtMethod* GetArtMethod() const { return art_method_; }
+  void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+
   // Returns an instruction with the opposite boolean value from 'cond'.
   // The instruction has been inserted into the graph, either as a constant, or
   // before cursor.
@@ -479,6 +482,11 @@
 
   HCurrentMethod* cached_current_method_;
 
+  // The ArtMethod this graph is for. Note that for AOT, it may be null,
+  // for example for methods whose declaring class could not be resolved
+  // (such as when the superclass could not be found).
+  ArtMethod* art_method_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -1095,7 +1103,12 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
@@ -1952,6 +1965,14 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
@@ -2449,11 +2470,15 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
-  explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
@@ -3418,7 +3443,7 @@
                         MethodReference target_method,
                         DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type,
+                        InvokeType optimized_invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
@@ -3432,7 +3457,7 @@
                 dex_pc,
                 method_index,
                 original_invoke_type),
-        invoke_type_(invoke_type),
+        optimized_invoke_type_(optimized_invoke_type),
         clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
         dispatch_info_(dispatch_info) { }
@@ -3478,7 +3503,11 @@
   // platform-specific special input, such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
 
-  InvokeType GetInvokeType() const { return invoke_type_; }
+  InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
+  void SetOptimizedInvokeType(InvokeType invoke_type) {
+    optimized_invoke_type_ = invoke_type;
+  }
+
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
@@ -3501,6 +3530,7 @@
   }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
+  void SetTargetMethod(MethodReference method) { target_method_ = method; }
 
   int32_t GetStringInitOffset() const {
     DCHECK(IsStringInit());
@@ -3526,7 +3556,7 @@
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
-    return GetInvokeType() == kStatic;
+    return GetOriginalInvokeType() == kStatic;
   }
 
   // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
@@ -3599,7 +3629,7 @@
   void RemoveInputAt(size_t index);
 
  private:
-  const InvokeType invoke_type_;
+  InvokeType optimized_invoke_type_;
   ClinitCheckRequirement clinit_check_requirement_;
   // The target method may refer to different dex file or method index than the original
   // invoke. This happens for sharpened calls and for calls where a method was redeclared
@@ -3609,6 +3639,7 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
 class HInvokeVirtual : public HInvoke {
@@ -5566,6 +5597,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..6a1dbb9
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8440813..3495603 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,6 +20,10 @@
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
@@ -423,7 +427,7 @@
     return;
   }
   HInliner* inliner = new (graph->GetArena()) HInliner(
-    graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -435,6 +439,17 @@
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      HOptimization* arm_optimizations[] = {
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
@@ -500,12 +515,13 @@
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   ReferenceTypePropagation* type_propagation =
       new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
@@ -558,6 +574,7 @@
       licm,
       induction,
       bce,
+      fold3,  // evaluates code generated by dynamic bce
       simplify3,
       lse,
       dce2,
@@ -607,8 +624,6 @@
   stack_map.resize(codegen->ComputeStackMapsSize());
   codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
-
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -643,7 +658,6 @@
   ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -749,8 +763,8 @@
     ArtMethod* art_method = compiler_driver->ResolveMethod(
         soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
     // We may not get a method, for example if its class is erroneous.
-    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
     if (art_method != nullptr) {
+      graph->SetArtMethod(art_method);
       interpreter_metadata = art_method->GetQuickenedInfo();
     }
   }
@@ -847,6 +861,7 @@
                    dex_file,
                    dex_cache));
     if (codegen.get() != nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kCompiled);
       if (run_optimizations_) {
         method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
       } else {
@@ -857,7 +872,7 @@
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
       MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
     }
   }
 
@@ -933,6 +948,7 @@
   if (stack_map_data == nullptr) {
     return false;
   }
+  MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
   const void* code = code_cache->CommitCode(
       self,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..6296eed 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
-#include <sstream>
+#include <iomanip>
 #include <string>
 #include <type_traits>
 
@@ -27,18 +27,18 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
-  kCompiledBaseline,
-  kCompiledOptimized,
+  kCompiled,
   kInlinedInvoke,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
   kUnresolvedField,
   kUnresolvedFieldNotAFastAccess,
+  kRemovedCheckedCast,
+  kRemovedDeadInstruction,
+  kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
   kNotCompiledCannotBuildSSA,
-  kNotCompiledCantAccesType,
-  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -47,13 +47,12 @@
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
   kNotCompiledUnsupportedIsa,
+  kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
-  kNotOptimizedDisabled,
-  kNotOptimizedRegisterAllocator,
-  kNotOptimizedTryCatch,
-  kRemovedCheckedCast,
-  kRemovedDeadInstruction,
-  kRemovedNullCheck,
+  kInlinedMonomorphicCall,
+  kMonomorphicCall,
+  kPolymorphicCall,
+  kMegamorphicCall,
   kLastStat
 };
 
@@ -66,20 +65,19 @@
   }
 
   void Log() const {
+    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+      // Log only in debug builds or if the compiler is verbose.
+      return;
+    }
+
     if (compile_stats_[kAttemptCompilation] == 0) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      size_t unoptimized_percent =
-          compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
-      size_t optimized_percent =
-          compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      std::ostringstream oss;
-      oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
-      oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
-      oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
-      LOG(INFO) << oss.str();
+      float compiled_percent =
+          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+          << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
       for (int i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
@@ -92,41 +90,42 @@
 
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+    std::string name;
     switch (stat) {
-      case kAttemptCompilation : return "kAttemptCompilation";
-      case kCompiledBaseline : return "kCompiledBaseline";
-      case kCompiledOptimized : return "kCompiledOptimized";
-      case kInlinedInvoke : return "kInlinedInvoke";
-      case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
-      case kUnresolvedMethod : return "kUnresolvedMethod";
-      case kUnresolvedField : return "kUnresolvedField";
-      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
-      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
-      case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
-      case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
-      case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
-      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kRemovedCheckedCast: return "kRemovedCheckedCast";
-      case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
-      case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCompiled : name = "Compiled"; break;
+      case kInlinedInvoke : name = "InlinedInvoke"; break;
+      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+      case kUnresolvedField : name = "UnresolvedField"; break;
+      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kMonomorphicCall: name = "MonomorphicCall"; break;
+      case kPolymorphicCall: name = "PolymorphicCall"; break;
+      case kMegamorphicCall: name = "kMegamorphicCall"; break;
 
-      case kLastStat: break;  // Invalid to print out.
+      case kLastStat:
+        LOG(FATAL) << "invalid stat "
+            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+        UNREACHABLE();
     }
-    LOG(FATAL) << "invalid stat "
-        << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-    UNREACHABLE();
+    return "OptStat#" + name;
   }
 
   AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 808a1dc..b383f1e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -26,6 +26,15 @@
  public:
   explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
 
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
  private:
   void VisitAdd(HAdd* add) OVERRIDE {
     BinaryFP(add);
@@ -72,7 +81,7 @@
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
-    InitializePCRelativeBasePointer(switch_insn);
+    InitializePCRelativeBasePointer();
     HGraph* graph = GetGraph();
     HBasicBlock* block = switch_insn->GetBlock();
     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,22 +93,22 @@
     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
   }
 
-  void InitializePCRelativeBasePointer(HInstruction* user) {
+  void InitializePCRelativeBasePointer() {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
       return;
     }
 
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
     DCHECK(base_ != nullptr);
   }
 
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializePCRelativeBasePointer(insn);
+    InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
         new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
@@ -111,7 +120,7 @@
     // addressing, we need the PC-relative address base.
     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
-      InitializePCRelativeBasePointer(invoke);
+      InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
       DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
       invoke_static_or_direct->AddSpecialInput(base_);
@@ -133,6 +142,7 @@
 void PcRelativeFixups::Run() {
   PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 0d05c49..dd34924 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -127,6 +127,87 @@
   }
 }
 
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+  ArenaAllocatorAdapter<void> adapter =
+      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+  ArenaVector<HPhi*> visited_phis(adapter);
+  ArenaVector<HInstruction*> worklist(adapter);
+  worklist.push_back(root_instr);
+
+  while (!worklist.empty()) {
+    HInstruction* instr = worklist.back();
+    worklist.pop_back();
+
+    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+      // Expect that both `root_instr` and its inputs have invalid RTI.
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+      // Insert all unvisited inputs to the worklist.
+      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current();
+        if (input->IsPhi()) {
+          if (ContainsElement(visited_phis, input->AsPhi())) {
+            continue;
+          } else {
+            visited_phis.push_back(input->AsPhi());
+          }
+        }
+        worklist.push_back(input);
+      }
+    } else if (instr->IsNullConstant()) {
+      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+    } else {
+      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+          << instr->GetReferenceTypeInfo();
+      UNREACHABLE();
+    }
+  }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+  }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+  // In some cases, the fix-point iteration will leave kPrimNot instructions with
+  // invalid RTI because bytecode does not provide enough typing information.
+  // Set the RTI of such instructions to Object.
+  // Example:
+  //   MyClass a = null, b = null;
+  //   while (a == null) {
+  //     if (cond) { a = b; } else { b = a; }
+  //   }
+
+  if (kIsDebugBuild) {
+    // Test that if we are going to set RTI from invalid to Object, that
+    // instruction did not have any typed instructions in its def-use chain
+    // and therefore its type could not be inferred.
+    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+  }
+
+  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+    instr->SetReferenceTypeInfo(obj_rti);
+  });
+}
+
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -136,6 +217,7 @@
   }
 
   ProcessWorklist();
+  SetUntypedInstructionsToObject();
   ValidateTypes();
 }
 
@@ -534,8 +616,9 @@
 void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
 void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -588,11 +671,16 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -653,7 +741,9 @@
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
+  if (!parent_rti.IsValid()) {
+    return;
+  }
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
   if (handle->IsObjectArrayClass()) {
@@ -665,8 +755,6 @@
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
   }
-
-  return;
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -683,7 +771,7 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
     UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
@@ -711,6 +799,7 @@
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
+
   ScopedObjectAccess soa(Thread::Current());
   UpdateArrayGet(instr, handles_, object_class_handle_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
@@ -770,7 +859,10 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5c05592..21789e1 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -57,6 +57,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
+  void SetUntypedInstructionsToObject();
 
   StackHandleScopeCollection* handles_;
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a128079..5e1d1d9 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -49,7 +49,8 @@
   }
 
   // TODO: Avoid CompilerDriver.
-  InvokeType invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType optimized_invoke_type = original_invoke_type;
   MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
   int vtable_idx;
   uintptr_t direct_code, direct_method;
@@ -58,15 +59,18 @@
       invoke->GetDexPc(),
       false /* update_stats: already updated in builder */,
       true /* enable_devirtualization */,
-      &invoke_type,
+      &optimized_invoke_type,
       &target_method,
       &vtable_idx,
       &direct_code,
       &direct_method);
-  DCHECK(success);
-  DCHECK_EQ(invoke_type, invoke->GetInvokeType());
-  DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
-  DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+  if (!success) {
+    // TODO: try using kDexCachePcRelative. It's always a valid method load
+    // kind as long as it's supported by the codegen
+    return;
+  }
+  invoke->SetOptimizedInvokeType(optimized_invoke_type);
+  invoke->SetTargetMethod(target_method);
 
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 4d30b83..8f6b6d8 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -45,6 +45,14 @@
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
+  /*
+   * Flushes the stream. Returns whether the operation was successful.
+   *
+   * An OutputStream may delay reporting errors from WriteFully() or
+   * Seek(). In that case, Flush() shall report any pending error.
+   */
+  virtual bool Flush() = 0;
+
  private:
   const std::string location_;
 
diff --git a/compiler/output_stream_test.cc b/compiler/output_stream_test.cc
index 6104ccd..84c76f2 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/output_stream_test.cc
@@ -19,6 +19,7 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -48,6 +49,7 @@
     EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
     CheckOffset(10);
     EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
+    EXPECT_TRUE(output_stream_->Flush());
   }
 
   void CheckTestOutput(const std::vector<uint8_t>& actual) {
@@ -77,9 +79,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-    CHECK(file_output_stream.get() != nullptr);
-    BufferedOutputStream buffered_output_stream(file_output_stream.release());
+    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -99,4 +99,39 @@
   CheckTestOutput(output);
 }
 
+TEST_F(OutputStreamTest, BufferedFlush) {
+  struct CheckingOutputStream : OutputStream {
+    CheckingOutputStream()
+        : OutputStream("dummy"),
+          flush_called(false) { }
+    ~CheckingOutputStream() OVERRIDE {}
+
+    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
+                    size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    bool Flush() OVERRIDE {
+      flush_called = true;
+      return true;
+    }
+
+    bool flush_called;
+  };
+
+  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  CheckingOutputStream* checking_output_stream = cos.get();
+  BufferedOutputStream buffered(std::move(cos));
+  ASSERT_FALSE(checking_output_stream->flush_called);
+  bool flush_result = buffered.Flush();
+  ASSERT_TRUE(flush_result);
+  ASSERT_TRUE(checking_output_stream->flush_called);
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 7ad5b44..cdeb443 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2569,30 +2569,19 @@
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 1de51a2..5ae2cc2 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -832,11 +832,12 @@
 TEST(Thumb2AssemblerTest, MovWMovT) {
   arm::Thumb2Assembler assembler;
 
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 9246c82..886295e 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -439,14 +439,14 @@
   nullptr
 };
 const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
 const char* SpecialAddSubResults[] = {
diff --git a/compiler/vector_output_stream.h b/compiler/vector_output_stream.h
index 3c5877c..a3c58d0 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/vector_output_stream.h
@@ -29,9 +29,9 @@
  public:
   VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
 
-  virtual ~VectorOutputStream() {}
+  ~VectorOutputStream() OVERRIDE {}
 
-  bool WriteFully(const void* buffer, size_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
     if (static_cast<size_t>(offset_) == vector_->size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_->insert(vector_->end(), &start[0], &start[byte_count]);
@@ -45,7 +45,11 @@
     return true;
   }
 
-  off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE {
+    return true;
+  }
 
  private:
   void EnsureCapacity(off_t new_offset) {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 89c2a7c..77211ce 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -55,8 +55,10 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_file.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
@@ -494,6 +496,7 @@
       app_image_(false),
       boot_image_(false),
       is_host_(false),
+      image_writer_(nullptr),
       driver_(nullptr),
       dump_stats_(false),
       dump_passes_(false),
@@ -1408,8 +1411,36 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
-                             oat_file_.get())) {
+      std::unique_ptr<ElfWriter> elf_writer =
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file_.get());
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      if (!oat_writer->WriteRodata(rodata)) {
+        LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      if (!oat_writer->WriteCode(text)) {
+        LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer->GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer->GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer->GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      if (!elf_writer->End()) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
         return false;
       }
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d6a8c9..a926ca5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,11 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+// from kernel <include/linux/threads.h>
+#define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
+
+static const pid_t kImgDiagGuaranteedBadPid = (PID_MAX_LIMIT + 1);
+
 class ImgDiagTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
@@ -132,7 +137,8 @@
 
   // Run imgdiag --image-diff-pid=some_bad_pid and wait until it's done with a 0 exit code.
   std::string error_msg;
-  ASSERT_FALSE(ExecDefaultBootImage(-12345, &error_msg)) << "Incorrectly executed";
+  ASSERT_FALSE(ExecDefaultBootImage(kImgDiagGuaranteedBadPid,
+                                    &error_msg)) << "Incorrectly executed";
   UNUSED(error_msg);
 }
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 94eb82b..d20f7d5 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -28,6 +28,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -116,7 +117,7 @@
 
     File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
     std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file)));
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
     builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
 
     builder_->Start();
@@ -162,7 +163,7 @@
 
     builder_->End();
 
-    return builder_->Good() && output_stream->Flush();
+    return builder_->Good();
   }
 
   void Walk(Callback callback) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f7ed812..238d9f3 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -106,17 +106,16 @@
   return num_registers;
 }
 
-static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ArtMethod::HasSameNameAndSignature(ArtMethod* other) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
-  const DexFile* dex_file = method1->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
-  if (method1->GetDexCache() == method2->GetDexCache()) {
-    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetDexMethodIndex());
+  if (GetDexCache() == other->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(other->GetDexMethodIndex());
     return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
   }
-  const DexFile* dex_file2 = method2->GetDexFile();
-  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file2 = other->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(other->GetDexMethodIndex());
   if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
     return false;  // Name mismatch.
   }
@@ -149,8 +148,7 @@
         mirror::Class* interface = iftable->GetInterface(i);
         for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
           ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(
-              this, interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
+          if (HasSameNameAndSignature(interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
             result = interface_method;
             break;
           }
@@ -158,8 +156,9 @@
       }
     }
   }
-  DCHECK(result == nullptr || HasSameNameAndSignature(
-      GetInterfaceMethodIfProxy(sizeof(void*)), result->GetInterfaceMethodIfProxy(sizeof(void*))));
+  DCHECK(result == nullptr ||
+         GetInterfaceMethodIfProxy(sizeof(void*))->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(sizeof(void*))));
   return result;
 }
 
@@ -299,7 +298,9 @@
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
         mirror::Throwable* pending_exception = nullptr;
-        self->PopDeoptimizationContext(result, &pending_exception);
+        bool from_code = false;
+        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
+        CHECK(!from_code);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
 
@@ -308,7 +309,7 @@
         if (pending_exception != nullptr) {
           self->SetException(pending_exception);
         }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
+        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 5a2d6c3..8efad88 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -263,6 +263,9 @@
   mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns true if this method has the same name and signature of the other method.
+  bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 32e77b7..51f57c3 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -69,10 +69,25 @@
   return alloc_record_count;
 }
 
+// Takes a method and returns a 'canonical' one if the method is default (and therefore potentially
+// copied from some other class). This ensures that the debugger does not get confused as to which
+// method we are in.
+static ArtMethod* GetCanonicalMethod(ArtMethod* m)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(!m->IsDefault())) {
+    return m;
+  } else {
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
+                                                      m->GetDexMethodIndex(),
+                                                      sizeof(void*));
+  }
+}
+
 class Breakpoint : public ValueObject {
  public:
   Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
-    : method_(method),
+    : method_(GetCanonicalMethod(method)),
       dex_pc_(dex_pc),
       deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
@@ -99,6 +114,12 @@
     return deoptimization_kind_;
   }
 
+  // Returns true if the method of this breakpoint and the passed in method should be considered the
+  // same. That is, they are either the same method or they are copied from the same method.
+  bool IsInMethod(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return method_ == GetCanonicalMethod(m);
+  }
+
  private:
   // The location of this breakpoint.
   ArtMethod* method_;
@@ -306,12 +327,12 @@
   return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
-static bool IsBreakpoint(const ArtMethod* m, uint32_t dex_pc)
+static bool IsBreakpoint(ArtMethod* m, uint32_t dex_pc)
     REQUIRES(!Locks::breakpoint_lock_)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
     }
@@ -1282,9 +1303,9 @@
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
-static JDWP::MethodId ToMethodId(const ArtMethod* m)
+static JDWP::MethodId ToMethodId(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
+  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(GetCanonicalMethod(m)));
 }
 
 static ArtField* FromFieldId(JDWP::FieldId fid)
@@ -2763,7 +2784,7 @@
   if (m == nullptr) {
     memset(location, 0, sizeof(*location));
   } else {
-    location->method = m;
+    location->method = GetCanonicalMethod(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
   }
 }
@@ -3214,7 +3235,7 @@
 static const Breakpoint* FindFirstBreakpointForMethod(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       return &breakpoint;
     }
   }
@@ -3231,7 +3252,7 @@
                                            DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
     }
   }
@@ -3274,12 +3295,15 @@
 
   if (first_breakpoint == nullptr) {
     // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // inlined or default, we deoptimize everything; otherwise we deoptimize only this method. We
+    // deoptimize with defaults because we do not know everywhere they are used. It is possible some
+    // of the copies could be inlined or otherwise missed.
+    // TODO Deoptimizing on default methods might not be necessary in all cases.
     // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
     // Therefore we must not hold any lock when we call it.
-    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    bool need_full_deoptimization = m->IsDefault() || IsMethodPossiblyInlined(self, m);
     if (need_full_deoptimization) {
-      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+      VLOG(jdwp) << "Need full deoptimization because of possible inlining or copying of method "
                  << PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
@@ -3359,7 +3383,7 @@
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
   DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
       deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
       DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 21e4e44..dccb1da 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -598,8 +598,12 @@
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+    mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+    if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+      // The super class does not have the method.
+      return nullptr;
+    }
+    return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index dfd9fcd..c019cae 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -52,7 +52,7 @@
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
-  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
 
   QuickExceptionHandler exception_handler(self, true);
   exception_handler.DeoptimizeSingleFrame();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index abf9ac4..2c8ed88 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -685,7 +685,9 @@
     }
 
     mirror::Throwable* pending_exception = nullptr;
-    self->PopDeoptimizationContext(&result, &pending_exception);
+    bool from_code = false;
+    self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
+    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -712,7 +714,7 @@
     if (pending_exception != nullptr) {
       self->SetException(pending_exception);
     }
-    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
@@ -754,7 +756,8 @@
   if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
-    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+    self->PushDeoptimizationContext(
+        result, shorty[0] == 'L', /* from_code */ false, self->GetException());
 
     // Set special exception to cause deoptimization.
     self->SetException(Thread::GetDeoptimizationException());
@@ -1019,15 +1022,21 @@
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
         << PrettyMethod(called) << " " << invoke_type;
-    if (virtual_or_interface) {
-      // Refine called method based on receiver.
-      CHECK(receiver != nullptr) << invoke_type;
-
+    if (virtual_or_interface || invoke_type == kSuper) {
+      // Refine called method based on receiver for kVirtual/kInterface, and
+      // caller for kSuper.
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
-      } else {
+      } else if (invoke_type == kInterface) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+      } else {
+        DCHECK_EQ(invoke_type, kSuper);
+        CHECK(caller != nullptr) << invoke_type;
+        called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
+            called->GetMethodIndex(), sizeof(void*));
       }
 
       CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 9c8e4df..7d00094 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1526,10 +1526,9 @@
   }
 }
 
+// Below may be called by mutator itself just before thread termination.
 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
-  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  ReaderMutexLock wmu(self, bulk_free_lock_);
   size_t free_bytes = 0U;
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
@@ -1544,10 +1543,17 @@
       // Count the number of free slots left.
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
+      // The above bracket index lock guards thread local free list to avoid race condition
+      // with unioning bulk free list to thread local free list by GC thread in BulkFree.
+      // If thread local run is true, GC thread will help update thread local free list
+      // in BulkFree. And the latest thread local free list will be merged to free list
+      // either when this thread local run is full or when revoking this run here. In this
+      // case the free list wll be updated. If thread local run is false, GC thread will help
+      // merge bulk free list in next BulkFree.
+      // Thus no need to merge bulk free list to free list again here.
       bool dont_care;
       thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index bc2c197..264cd2c 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1062,7 +1062,9 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+    self->PushDeoptimizationContext(return_value,
+                                    return_shorty == 'L',
+                                    false /* from_code */,
                                     nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d686f74..871fad7 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -397,7 +397,10 @@
   self->PopShadowFrame();
 }
 
-void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JValue* ret_val)
+void EnterInterpreterFromDeoptimize(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    bool from_code,
+                                    JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
@@ -408,7 +411,7 @@
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
-    uint32_t new_dex_pc;
+    uint32_t new_dex_pc = dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
       // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
       // the instrumentation. To prevent from reporting it a second time, we simply pass a
@@ -419,11 +422,16 @@
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
                                   // or DexFile::kDexNoIndex if there is none.
-    } else {
-      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      // For an invoke, use the dex pc of the next instruction.
+    } else if (!from_code) {
+      // For the debugger and full deoptimization stack, we must go past the invoke
+      // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
-      new_dex_pc = dex_pc + (instr->IsInvoke() ? instr->SizeInCodeUnits() : 0);
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      DCHECK(instr->IsInvoke());
+      new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+    } else {
+      // Nothing to do, the dex_pc is the one at which the code requested
+      // the deoptimization.
     }
     if (new_dex_pc != DexFile::kDexNoIndex) {
       shadow_frame->SetDexPC(new_dex_pc);
@@ -432,6 +440,8 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    // Following deoptimizations of shadow frames must pass the invoke instruction.
+    from_code = false;
     first = false;
   }
   ret_val->SetJ(value.GetJ());
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index b21ea84..8e7f3da 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -37,7 +37,8 @@
                                        mirror::Object* receiver, uint32_t* args, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame,
+// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
+extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
                                            JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 27a0e2d..92aa86e 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -142,11 +142,24 @@
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
   DCHECK(!method->IsRuntimeMethod());
+  // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
     return false;
   }
-  return jit_compile_method_(jit_compiler_handle_, method, self);
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    return false;
+  }
+
+  if (!code_cache_->NotifyCompilationOf(method, self)) {
+    return false;
+  }
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  code_cache_->DoneCompiling(method, self);
+  return success;
 }
 
 void Jit::CreateThreadPool() {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 804d69f..3342e92 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -536,7 +536,9 @@
       instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
+      if (!info->IsMethodBeingCompiled()) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+      }
     }
   }
 
@@ -577,12 +579,17 @@
       }
     }
 
-    // Free all profiling info.
-    for (ProfilingInfo* info : profiling_infos_) {
-      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
-    }
-    profiling_infos_.clear();
+    void* data_mspace = data_mspace_;
+    // Free all profiling infos of methods that were not being compiled.
+    auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
+      [data_mspace] (ProfilingInfo* info) {
+        if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
+          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          return true;
+        }
+        return false;
+      });
+    profiling_infos_.erase(profiling_kept_end, profiling_infos_.end());
 
     live_bitmap_.reset(nullptr);
     has_done_one_collection_ = true;
@@ -643,7 +650,7 @@
                                                       ArtMethod* method,
                                                       const std::vector<uint32_t>& entries) {
   size_t profile_info_size = RoundUp(
-      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
       sizeof(void*));
   ScopedThreadSuspension sts(self, kSuspended);
   MutexLock mu(self, lock_);
@@ -694,5 +701,25 @@
   MutexLock mu(Thread::Current(), lock_);
   return last_update_time_ns_;
 }
+
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
+  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info == nullptr || info->IsMethodBeingCompiled()) {
+    return false;
+  }
+  info->SetIsMethodBeingCompiled(true);
+  return true;
+}
+
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  DCHECK(info->IsMethodBeingCompiled());
+  info->SetIsMethodBeingCompiled(false);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index acd7c62..4032c7b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -66,6 +66,14 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompiling(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2e52b1b..dcb346c 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -54,28 +54,29 @@
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
-  // object, it will never be filled.
-  if (entries.empty()) {
-    return true;
-  }
+  // We always create a `ProfilingInfo` object, even if there is no instruction we are
+  // interested in. The JIT code cache internally uses it.
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
   return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+InlineCache* ProfilingInfo::GetInlineCache(uint32_t dex_pc) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-    if (cache_[i].dex_pc == dex_pc) {
+    if (cache_[i].dex_pc_ == dex_pc) {
       cache = &cache_[i];
       break;
     }
   }
   DCHECK(cache != nullptr);
+  return cache;
+}
 
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = GetInlineCache(dex_pc);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index b13a315..ddaf02f 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -25,6 +25,7 @@
 namespace art {
 
 class ArtMethod;
+class ProfilingInfo;
 
 namespace jit {
 class JitCodeCache;
@@ -34,6 +35,49 @@
 class Class;
 }
 
+// Structure to store the classes seen at runtime for a specific instruction.
+// Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+class InlineCache {
+ public:
+  bool IsMonomorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 2);
+    return !classes_[0].IsNull() && classes_[1].IsNull();
+  }
+
+  bool IsMegamorphic() const {
+    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+      if (classes_[i].IsNull()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we cannot ensure the inline cache is actually monomorphic
+    // at this point, as other threads may have updated it.
+    return classes_[0].Read();
+  }
+
+  bool IsUnitialized() const {
+    return classes_[0].IsNull();
+  }
+
+  bool IsPolymorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 3);
+    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+  }
+
+ private:
+  static constexpr uint16_t kIndividualCacheSize = 5;
+  uint32_t dex_pc_;
+  GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+
+  friend class ProfilingInfo;
+
+  DISALLOW_COPY_AND_ASSIGN(InlineCache);
+};
+
 /**
  * Profiling info for a method, created and filled by the interpreter once the
  * method is warm, and used by the compiler to drive optimizations.
@@ -67,44 +111,24 @@
     return method_;
   }
 
+  InlineCache* GetInlineCache(uint32_t dex_pc);
+
+  bool IsMethodBeingCompiled() const {
+    return is_method_being_compiled_;
+  }
+
+  void SetIsMethodBeingCompiled(bool value) {
+    is_method_being_compiled_ = value;
+  }
+
  private:
-  // Structure to store the classes seen at runtime for a specific instruction.
-  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
-  struct InlineCache {
-    bool IsMonomorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 2);
-      return !classes_[0].IsNull() && classes_[1].IsNull();
-    }
-
-    bool IsMegamorphic() const {
-      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-        if (classes_[i].IsNull()) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    bool IsUnitialized() const {
-      return classes_[0].IsNull();
-    }
-
-    bool IsPolymorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 3);
-      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-    }
-
-    static constexpr uint16_t kIndividualCacheSize = 5;
-    uint32_t dex_pc;
-    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
-  };
-
   ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
       : number_of_inline_caches_(entries.size()),
-        method_(method) {
+        method_(method),
+        is_method_being_compiled_(false) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc = entries[i];
+      cache_[i].dex_pc_ = entries[i];
     }
   }
 
@@ -114,6 +138,11 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Whether the ArtMethod is currently being compiled. This flag
+  // is implicitly guarded by the JIT code cache lock.
+  // TODO: Make the JIT code cache lock global.
+  bool is_method_being_compiled_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 649df5f..d1687d7 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -2210,4 +2210,55 @@
   check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
 }
 
+static bool IsLocked(JNIEnv* env, jobject jobj) {
+  ScopedObjectAccess soa(env);
+  LockWord lock_word = soa.Decode<mirror::Object*>(jobj)->GetLockWord(true);
+  switch (lock_word.GetState()) {
+    case LockWord::kHashCode:
+    case LockWord::kUnlocked:
+      return false;
+    case LockWord::kThinLocked:
+      return true;
+    case LockWord::kFatLocked:
+      return lock_word.FatLockMonitor()->IsLocked();
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      UNREACHABLE();
+    }
+  }
+}
+
+TEST_F(JniInternalTest, DetachThreadUnlockJNIMonitors) {
+  // We need to lock an object, detach, reattach, and check the locks.
+  //
+  // As re-attaching will create a different thread, we need to use a global
+  // ref to keep the object around.
+
+  // Create an object to torture.
+  jobject global_ref;
+  {
+    jclass object_class = env_->FindClass("java/lang/Object");
+    ASSERT_NE(object_class, nullptr);
+    jobject object = env_->AllocObject(object_class);
+    ASSERT_NE(object, nullptr);
+    global_ref = env_->NewGlobalRef(object);
+  }
+
+  // Lock it.
+  env_->MonitorEnter(global_ref);
+  ASSERT_TRUE(IsLocked(env_, global_ref));
+
+  // Detach and re-attach.
+  jint detach_result = vm_->DetachCurrentThread();
+  ASSERT_EQ(detach_result, JNI_OK);
+  jint attach_result = vm_->AttachCurrentThread(&env_, nullptr);
+  ASSERT_EQ(attach_result, JNI_OK);
+
+  // Look at the global ref, check whether it's still locked.
+  ASSERT_FALSE(IsLocked(env_, global_ref));
+
+  // Delete the global ref.
+  env_->DeleteGlobalRef(global_ref);
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4b2ac20..e133847 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -154,8 +154,10 @@
   }
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to build process map");
+  if (map == nullptr) {
+    if (error_msg != nullptr) {
+      *error_msg = StringPrintf("Failed to build process map");
+    }
     return false;
   }
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
@@ -164,9 +166,11 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
-  *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
-                            "any existing map. See process maps in the log.", begin, end);
+  if (error_msg != nullptr) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
+    *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
+                              "any existing map. See process maps in the log.", begin, end);
+  }
   return false;
 }
 
@@ -239,15 +243,16 @@
   std::string error_detail;
   CheckNonOverlapping(expected, limit, &error_detail);
 
-  std::ostringstream os;
-  os <<  StringPrintf("Failed to mmap at expected address, mapped at "
-                      "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
-                      actual, expected);
-  if (!error_detail.empty()) {
-    os << " : " << error_detail;
+  if (error_msg != nullptr) {
+    std::ostringstream os;
+    os <<  StringPrintf("Failed to mmap at expected address, mapped at "
+                        "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
+                        actual, expected);
+    if (!error_detail.empty()) {
+      os << " : " << error_detail;
+    }
+    *error_msg = os.str();
   }
-
-  *error_msg = os.str();
   return false;
 }
 
@@ -379,7 +384,8 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg))
+        << ((error_msg != nullptr) ? *error_msg : std::string());
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -414,15 +420,17 @@
                                                            page_aligned_offset,
                                                            low_4gb));
   if (actual == MAP_FAILED) {
-    auto saved_errno = errno;
+    if (error_msg != nullptr) {
+      auto saved_errno = errno;
 
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
 
-    *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
-                              ") of file '%s' failed: %s. See process maps in the log.",
-                              page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename,
-                              strerror(saved_errno));
+      *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
+                                ") of file '%s' failed: %s. See process maps in the log.",
+                                page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
+                                static_cast<int64_t>(page_aligned_offset), filename,
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index a67a925..efce09a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -99,11 +99,12 @@
                             error_msg);
   }
 
-  // Map part of a file, taking care of non-page aligned offsets.  The
-  // "start" offset is absolute, not relative. This version allows
-  // requesting a specific address for the base of the
-  // mapping. "reuse" allows us to create a view into an existing
-  // mapping where we do not take ownership of the memory.
+  // Map part of a file, taking care of non-page aligned offsets.  The "start" offset is absolute,
+  // not relative. This version allows requesting a specific address for the base of the mapping.
+  // "reuse" allows us to create a view into an existing mapping where we do not take ownership of
+  // the memory. If error_msg is null then we do not print /proc/maps to the log if
+  // MapFileAtAddress fails. This helps improve performance of the fail case since reading and
+  // printing /proc/maps takes several milliseconds in the worst case.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
   static MemMap* MapFileAtAddress(uint8_t* addr,
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 680f4ac..83e594b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -52,10 +52,10 @@
 
 namespace art {
 
-// Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
+// Whether OatFile::Open will try dlopen. Fallback is our own ELF loader.
 static constexpr bool kUseDlopen = true;
 
-// Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
+// Whether OatFile::Open will try dlopen on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
@@ -64,348 +64,140 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
-std::string OatFile::ResolveRelativeEncodedDexLocation(
-      const char* abs_dex_location, const std::string& rel_dex_location) {
-  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
-    // Strip :classes<N>.dex used for secondary multidex files.
-    std::string base = DexFile::GetBaseLocation(rel_dex_location);
-    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+// Note for OatFileBase and descendents:
+//
+// These are used in OatFile::Open to try all our loaders.
+//
+// The process is simple:
+//
+// 1) Allocate an instance through the standard constructor (location, executable)
+// 2) Load() to try to open the file.
+// 3) ComputeFields() to populate the OatFile fields like begin_, using FindDynamicSymbolAddress.
+// 4) PreSetup() for any steps that should be done before the final setup.
+// 5) Setup() to complete the procedure.
 
-    // Check if the base is a suffix of the provided abs_dex_location.
-    std::string target_suffix = "/" + base;
-    std::string abs_location(abs_dex_location);
-    if (abs_location.size() > target_suffix.size()) {
-      size_t pos = abs_location.size() - target_suffix.size();
-      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
-        return abs_location + multidex_suffix;
-      }
-    }
-  }
-  return rel_dex_location;
-}
+class OatFileBase : public OatFile {
+ public:
+  virtual ~OatFileBase() {}
 
-void OatFile::CheckLocation(const std::string& location) {
-  CHECK(!location.empty());
-}
-
-OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+  template <typename kOatFileBaseSubType>
+  static OatFileBase* OpenOatFile(const std::string& elf_filename,
                                   const std::string& location,
+                                  uint8_t* requested_base,
+                                  uint8_t* oat_file_begin,
+                                  bool writable,
+                                  bool executable,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
-  oat_file->elf_file_.reset(elf_file);
-  uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
-  CHECK(has_section);
-  oat_file->begin_ = elf_file->Begin() + offset;
-  oat_file->end_ = elf_file->Begin() + size + offset;
-  // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
-}
+                                  std::string* error_msg);
 
-OatFile* OatFile::Open(const std::string& filename,
-                       const std::string& location,
-                       uint8_t* requested_base,
-                       uint8_t* oat_file_begin,
-                       bool executable,
-                       const char* abs_dex_location,
-                       std::string* error_msg) {
-  CHECK(!filename.empty()) << location;
-  CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+ protected:
+  OatFileBase(const std::string& filename, bool executable) : OatFile(filename, executable) {}
 
-  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
-  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
-  //       !executable is a sign that we may want to patch), which may not be allowed for
-  //       various reasons.
-  // dlopen always returns the same library if it is already opened on the host. For this reason
-  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
-  // the same library loaded multiple times at different addresses is required for class unloading
-  // and for having dex caches arrays in the .bss section.
-  Runtime* const runtime = Runtime::Current();
-  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
-  if (kUseDlopen && executable) {
-    bool success = kIsTargetBuild;
-    bool reserved_location = false;
-      // Manager may be null if we are running without a runtime.
-    if (!success && kUseDlopenOnHost && manager != nullptr) {
-      // RegisterOatFileLocation returns false if we are not the first caller to register that
-      // location.
-      reserved_location = manager->RegisterOatFileLocation(location);
-      success = reserved_location;
-    }
-    if (success) {
-      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-      // this will register the oat file with the linker and allows libunwind to find our info.
-      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-      if (reserved_location) {
-        manager->UnRegisterOatFileLocation(location);
-      }
-      if (ret != nullptr) {
-        return ret.release();
-      }
-      if (kPrintDlOpenErrorMessage) {
-        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
-      }
-    }
+  virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                                  std::string* error_msg) const = 0;
+
+  virtual bool Load(const std::string& elf_filename,
+                    uint8_t* oat_file_begin,
+                    bool writable,
+                    bool executable,
+                    std::string* error_msg) = 0;
+
+  bool ComputeFields(uint8_t* requested_base,
+                     const std::string& file_path,
+                     std::string* error_msg);
+
+  virtual void PreSetup(const std::string& elf_filename) = 0;
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
+
+  // Setters exposed for ElfOatFile.
+
+  void SetBegin(const uint8_t* begin) {
+    begin_ = begin;
   }
 
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  //
-  //
-  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
-  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+  void SetEnd(const uint8_t* end) {
+    end_ = end;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OatFileBase);
+};
+
+template <typename kOatFileBaseSubType>
+OatFileBase* OatFileBase::OpenOatFile(const std::string& elf_filename,
+                                      const std::string& location,
+                                      uint8_t* requested_base,
+                                      uint8_t* oat_file_begin,
+                                      bool writable,
+                                      bool executable,
+                                      const char* abs_dex_location,
+                                      std::string* error_msg) {
+  std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+  if (!ret->Load(elf_filename,
+                 oat_file_begin,
+                 writable,
+                 executable,
+                 error_msg)) {
     return nullptr;
   }
-  ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        abs_dex_location, error_msg));
 
-  // It would be nice to unlink here. But we might have opened the file created by the
-  // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
-  // to allow removal when we know the ELF must be borked.
+  if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
+    return nullptr;
+  }
+
+  ret->PreSetup(elf_filename);
+
+  if (!ret->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
-  if (!success) {
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile* OatFile::OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
-  bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
-  if (!success) {
-    CHECK(!error_msg->empty());
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
-      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
-  CHECK(!location_.empty());
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().RegisterOatFileLocation(location);
-  }
-}
-
-OatFile::~OatFile() {
-  STLDeleteElements(&oat_dex_files_storage_);
-  if (dlopen_handle_ != nullptr) {
-    dlclose(dlopen_handle_);
-  }
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
-  }
-}
-
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
-#ifdef __APPLE__
-  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
-  // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
-  return false;
-#else
-  {
-    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
-    if (absolute_path == nullptr) {
-      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
-      return false;
-    }
-#ifdef __ANDROID__
-    android_dlextinfo extinfo;
-    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
-    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
-#else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
-#endif
-  }
-  if (dlopen_handle_ == nullptr) {
-    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
-    return false;
-  }
-  begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatdata"));
+bool OatFileBase::ComputeFields(uint8_t* requested_base,
+                                const std::string& file_path,
+                                std::string* error_msg) {
+  std::string symbol_error_msg;
+  begin_ = FindDynamicSymbolAddress("oatdata", &symbol_error_msg);
   if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p, %s. See process maps in the log.",
-                              begin_, requested_base, elf_filename.c_str());
+        "oatdata=%p != expected=%p. See process maps in the log.",
+        begin_, requested_base);
     return false;
   }
-  end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatlastword"));
+  end_ = FindDynamicSymbolAddress("oatlastword", &symbol_error_msg);
   if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbss"));
+  bss_begin_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbss", &symbol_error_msg));
   if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
+    // No .bss section.
     bss_end_ = nullptr;
-    dlerror();
   } else {
-    bss_end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbsslastword"));
+    bss_end_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbsslastword", &symbol_error_msg));
     if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                elf_filename.c_str());
+      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file_path.c_str());
       return false;
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
   }
 
-  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
-  struct dl_iterate_context {
-    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
-      auto* context = reinterpret_cast<dl_iterate_context*>(data);
-      // See whether this callback corresponds to the file which we have just loaded.
-      bool contains_begin = false;
-      for (int i = 0; i < info->dlpi_phnum; i++) {
-        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                      info->dlpi_phdr[i].p_vaddr);
-          size_t memsz = info->dlpi_phdr[i].p_memsz;
-          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
-            contains_begin = true;
-            break;
-          }
-        }
-      }
-      // Add dummy mmaps for this file.
-      if (contains_begin) {
-        for (int i = 0; i < info->dlpi_phnum; i++) {
-          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                        info->dlpi_phdr[i].p_vaddr);
-            size_t memsz = info->dlpi_phdr[i].p_memsz;
-            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
-            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
-          }
-        }
-        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
-      }
-      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
-    }
-    const uint8_t* const begin_;
-    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { begin_, &dlopen_mmaps_ };
-
-  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
-  }
-
-  return Setup(abs_dex_location, error_msg);
-#endif  // __APPLE__
-}
-
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
-                          const char* abs_dex_location,
-                          std::string* error_msg) {
-  // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
-                                oat_file_begin));
-  if (elf_file_ == nullptr) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  bool loaded = elf_file_->Load(executable, error_msg);
-  if (!loaded) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  begin_ = elf_file_->FindDynamicSymbolAddress("oatdata");
-  if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p. See process maps in the log.",
-                              begin_, requested_base);
-    return false;
-  }
-  end_ = elf_file_->FindDynamicSymbolAddress("oatlastword");
-  if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  // Readjust to be non-inclusive upper bound.
-  end_ += sizeof(uint32_t);
-
-  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
-  if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
-    bss_end_ = nullptr;
-    dlerror();
-  } else {
-    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
-    if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                file->GetPath().c_str());
-      return false;
-    }
-    // Readjust to be non-inclusive upper bound.
-    bss_end_ += sizeof(uint32_t);
-  }
-
-  return Setup(abs_dex_location, error_msg);
+  return true;
 }
 
 // Read an unaligned entry from the OatDexFile data in OatFile and advance the read
@@ -428,7 +220,7 @@
   return true;
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s",
@@ -630,6 +422,486 @@
   return true;
 }
 
+////////////////////////
+// OatFile via dlopen //
+////////////////////////
+
+static bool RegisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
+    }
+    return false;
+  }
+  return true;
+}
+
+static void UnregisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
+    }
+  }
+}
+
+class DlOpenOatFile FINAL : public OatFileBase {
+ public:
+  DlOpenOatFile(const std::string& filename, bool executable)
+      : OatFileBase(filename, executable),
+        dlopen_handle_(nullptr),
+        first_oat_(RegisterOatFileLocation(filename)) {
+  }
+
+  ~DlOpenOatFile() {
+    if (dlopen_handle_ != nullptr) {
+      dlclose(dlopen_handle_);
+    }
+    UnregisterOatFileLocation(GetLocation());
+  }
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr =
+        reinterpret_cast<const uint8_t*>(dlsym(dlopen_handle_, symbol_name.c_str()));
+    if (ptr == nullptr) {
+      *error_msg = dlerror();
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  void PreSetup(const std::string& elf_filename) OVERRIDE;
+
+ private:
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* oat_file_begin,
+              std::string* error_msg);
+
+  // dlopen handle during runtime.
+  void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
+
+  // Dummy memory map objects corresponding to the regions mapped by dlopen.
+  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
+
+  // Track the registration status (= was this the first oat file) for the location.
+  const bool first_oat_;
+
+  DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
+};
+
+bool DlOpenOatFile::Load(const std::string& elf_filename,
+                         uint8_t* oat_file_begin,
+                         bool writable,
+                         bool executable,
+                         std::string* error_msg) {
+  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
+  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
+  //       !executable is a sign that we may want to patch), which may not be allowed for
+  //       various reasons.
+  if (!kUseDlopen) {
+    *error_msg = "DlOpen is disabled.";
+    return false;
+  }
+  if (writable) {
+    *error_msg = "DlOpen does not support writable loading.";
+    return false;
+  }
+  if (!executable) {
+    *error_msg = "DlOpen does not support non-executable loading.";
+    return false;
+  }
+
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  if (!kIsTargetBuild) {
+    if (!kUseDlopenOnHost) {
+      *error_msg = "DlOpen disabled for host.";
+      return false;
+    }
+    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
+    // stored in the first_oat_ flag.
+    if (!first_oat_) {
+      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
+      return false;
+    }
+  }
+
+  bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
+  DCHECK(dlopen_handle_ != nullptr || !success);
+
+  return success;
+}
+
+bool DlOpenOatFile::Dlopen(const std::string& elf_filename,
+                           uint8_t* oat_file_begin,
+                           std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename, oat_file_begin);
+  *error_msg = "Dlopen unsupported on Mac.";
+  return false;
+#else
+  {
+    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
+    if (absolute_path == nullptr) {
+      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
+      return false;
+    }
+#ifdef __ANDROID__
+    android_dlextinfo extinfo;
+    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
+                                                                //   (open oat files multiple
+                                                                //    times).
+                    ANDROID_DLEXT_FORCE_FIXED_VADDR;            // Take a non-zero vaddr as absolute
+                                                                //   (non-pic boot image).
+    if (oat_file_begin != nullptr) {                            //
+      extinfo.flags |= ANDROID_DLEXT_LOAD_AT_FIXED_ADDRESS;     // Use the requested addr if
+      extinfo.reserved_addr = oat_file_begin;                   // vaddr = 0.
+    }                                                           //   (pic boot image).
+    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
+#else
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    UNUSED(oat_file_begin);
+#endif
+  }
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
+    return false;
+  }
+  return true;
+#endif
+}
+
+void DlOpenOatFile::PreSetup(const std::string& elf_filename) {
+#ifdef __APPLE__
+  UNUSED(elf_filename);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      // See whether this callback corresponds to the file which we have just loaded.
+      bool contains_begin = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+              info->dlpi_phdr[i].p_vaddr);
+          size_t memsz = info->dlpi_phdr[i].p_memsz;
+          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
+            contains_begin = true;
+            break;
+          }
+        }
+      }
+      // Add dummy mmaps for this file.
+      if (contains_begin) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+                info->dlpi_phdr[i].p_vaddr);
+            size_t memsz = info->dlpi_phdr[i].p_memsz;
+            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
+            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
+          }
+        }
+        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
+      }
+      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
+    }
+    const uint8_t* const begin_;
+    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
+  } context = { Begin(), &dlopen_mmaps_ };
+
+  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+  }
+#endif
+}
+
+////////////////////////////////////////////////
+// OatFile via our own ElfFile implementation //
+////////////////////////////////////////////////
+
+class ElfOatFile FINAL : public OatFileBase {
+ public:
+  ElfOatFile(const std::string& filename, bool executable) : OatFileBase(filename, executable) {}
+
+  static ElfOatFile* OpenElfFile(File* file,
+                                 const std::string& location,
+                                 uint8_t* requested_base,
+                                 uint8_t* oat_file_begin,  // Override base if not null
+                                 bool writable,
+                                 bool executable,
+                                 const char* abs_dex_location,
+                                 std::string* error_msg);
+
+  bool InitializeFromElfFile(ElfFile* elf_file,
+                             const char* abs_dex_location,
+                             std::string* error_msg);
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr = elf_file_->FindDynamicSymbolAddress(symbol_name);
+    if (ptr == nullptr) {
+      *error_msg = "(Internal implementation could not find symbol)";
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  void PreSetup(const std::string& elf_filename ATTRIBUTE_UNUSED) OVERRIDE {
+  }
+
+ private:
+  bool ElfFileOpen(File* file,
+                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                   bool writable,
+                   bool executable,
+                   std::string* error_msg);
+
+ private:
+  // Backing memory map for oat file during cross compilation.
+  std::unique_ptr<ElfFile> elf_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfOatFile);
+};
+
+ElfOatFile* ElfOatFile::OpenElfFile(File* file,
+                                    const std::string& location,
+                                    uint8_t* requested_base,
+                                    uint8_t* oat_file_begin,  // Override base if not null
+                                    bool writable,
+                                    bool executable,
+                                    const char* abs_dex_location,
+                                    std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, executable));
+  bool success = oat_file->ElfFileOpen(file, oat_file_begin, writable, executable, error_msg);
+  if (!success) {
+    CHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  // Complete the setup.
+  if (!oat_file->ComputeFields(requested_base, file->GetPath(), error_msg)) {
+    return nullptr;
+  }
+
+  if (!oat_file->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
+  return oat_file.release();
+}
+
+bool ElfOatFile::InitializeFromElfFile(ElfFile* elf_file,
+                                       const char* abs_dex_location,
+                                       std::string* error_msg) {
+  if (IsExecutable()) {
+    *error_msg = "Cannot initialize from elf file in executable mode.";
+    return false;
+  }
+  elf_file_.reset(elf_file);
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  SetBegin(elf_file->Begin() + offset);
+  SetEnd(elf_file->Begin() + size + offset);
+  // Ignore the optional .bss section when opening non-executable.
+  return Setup(abs_dex_location, error_msg);
+}
+
+bool ElfOatFile::Load(const std::string& elf_filename,
+                      uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                      bool writable,
+                      bool executable,
+                      std::string* error_msg) {
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  if (file == nullptr) {
+    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+    return false;
+  }
+  return ElfOatFile::ElfFileOpen(file.get(),
+                                 oat_file_begin,
+                                 writable,
+                                 executable,
+                                 error_msg);
+}
+
+bool ElfOatFile::ElfFileOpen(File* file,
+                             uint8_t* oat_file_begin,
+                             bool writable,
+                             bool executable,
+                             std::string* error_msg) {
+  // TODO: rename requested_base to oat_data_begin
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                error_msg,
+                                oat_file_begin));
+  if (elf_file_ == nullptr) {
+    DCHECK(!error_msg->empty());
+    return false;
+  }
+  bool loaded = elf_file_->Load(executable, error_msg);
+  DCHECK(loaded || !error_msg->empty());
+  return loaded;
+}
+
+//////////////////////////
+// General OatFile code //
+//////////////////////////
+
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
+static void CheckLocation(const std::string& location) {
+  CHECK(!location.empty());
+}
+
+OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
+                                  const char* abs_dex_location,
+                                  std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, false /* executable */));
+  return oat_file->InitializeFromElfFile(elf_file, abs_dex_location, error_msg)
+      ? oat_file.release()
+      : nullptr;
+}
+
+OatFile* OatFile::Open(const std::string& filename,
+                       const std::string& location,
+                       uint8_t* requested_base,
+                       uint8_t* oat_file_begin,
+                       bool executable,
+                       const char* abs_dex_location,
+                       std::string* error_msg) {
+  CHECK(!filename.empty()) << location;
+  CheckLocation(location);
+  std::unique_ptr<OatFile> ret;
+
+  // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
+  // disabled.
+  OatFile* with_dlopen = OatFileBase::OpenOatFile<DlOpenOatFile>(filename,
+                                                                 location,
+                                                                 requested_base,
+                                                                 oat_file_begin,
+                                                                 false,
+                                                                 executable,
+                                                                 abs_dex_location,
+                                                                 error_msg);
+  if (with_dlopen != nullptr) {
+    return with_dlopen;
+  }
+  if (kPrintDlOpenErrorMessage) {
+    LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+  }
+
+  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+  //
+  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+  //
+  // We use our own ELF loader for Quick to deal with legacy apps that
+  // open a generated dex file by name, remove the file, then open
+  // another generated dex file with the same name. http://b/10614658
+  //
+  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+  //
+  //
+  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
+  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
+  OatFile* with_internal = OatFileBase::OpenOatFile<ElfOatFile>(filename,
+                                                                location,
+                                                                requested_base,
+                                                                oat_file_begin,
+                                                                false,
+                                                                executable,
+                                                                abs_dex_location,
+                                                                error_msg);
+  return with_internal;
+}
+
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 true,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 false,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile::OatFile(const std::string& location, bool is_executable)
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
+  CHECK(!location_.empty());
+}
+
+OatFile::~OatFile() {
+  STLDeleteElements(&oat_dex_files_storage_);
+}
+
 const OatHeader& OatFile::GetOatHeader() const {
   return *reinterpret_cast<const OatHeader*>(Begin());
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 0a77654..dbd7541 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,7 +40,7 @@
 class OatHeader;
 class OatDexFile;
 
-class OatFile FINAL {
+class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
 
@@ -74,7 +74,7 @@
                                const char* abs_dex_location,
                                std::string* error_msg);
 
-  ~OatFile();
+  virtual ~OatFile();
 
   bool IsExecutable() const {
     return is_executable_;
@@ -85,12 +85,6 @@
   // Indicates whether the oat file was compiled with full debugging capability.
   bool IsDebuggable() const;
 
-  ElfFile* GetElfFile() const {
-    CHECK_NE(reinterpret_cast<uintptr_t>(elf_file_.get()), reinterpret_cast<uintptr_t>(nullptr))
-        << "Cannot get an elf file from " << GetLocation();
-    return elf_file_.get();
-  }
-
   const std::string& GetLocation() const {
     return location_;
   }
@@ -260,35 +254,10 @@
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
                                               std::vector<std::string>* locations);
 
+ protected:
+  OatFile(const std::string& filename, bool executable);
+
  private:
-  static void CheckLocation(const std::string& location);
-
-  static OatFile* OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg);
-
-  static OatFile* OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,  // Override base if not null
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg);
-
-  explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
-                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
-                   const char* abs_dex_location,
-                   std::string* error_msg);
-
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
-
   // The oat file name.
   //
   // The image will embed this to link its associated oat file.
@@ -309,18 +278,6 @@
   // Was this oat_file loaded executable?
   const bool is_executable_;
 
-  // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  std::unique_ptr<MemMap> mem_map_;
-
-  // Backing memory map for oat file during cross compilation.
-  std::unique_ptr<ElfFile> elf_file_;
-
-  // dlopen handle during runtime.
-  void* dlopen_handle_;
-
-  // Dummy memory map objects corresponding to the regions mapped by dlopen.
-  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
-
   // Owning storage for the OatDexFile objects.
   std::vector<const OatDexFile*> oat_dex_files_storage_;
 
@@ -356,6 +313,7 @@
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatFile);
 };
 
@@ -426,6 +384,7 @@
   uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 585c7c4..7f4519c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -559,7 +559,9 @@
     args.Set(M::Image, image);
   }
 
-  if (args.GetOrDefault(M::HeapGrowthLimit) == 0u) {  // 0 means no growth limit
+  // 0 means no growth limit, and growth limit should be always <= heap size
+  if (args.GetOrDefault(M::HeapGrowthLimit) <= 0u ||
+      args.GetOrDefault(M::HeapGrowthLimit) > args.GetOrDefault(M::MemoryMaximumSize)) {
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 1552318..9cb37ee 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -283,7 +283,12 @@
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
-        single_frame_done_(false) {
+        single_frame_done_(false),
+        single_frame_deopt_method_(nullptr) {
+  }
+
+  ArtMethod* GetSingleFrameDeoptMethod() const {
+    return single_frame_deopt_method_;
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -356,6 +361,7 @@
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
+        single_frame_deopt_method_ = method;
       }
       return true;
     }
@@ -586,6 +592,7 @@
   bool stacked_shadow_frame_pushed_;
   const bool single_frame_deopt_;
   bool single_frame_done_;
+  ArtMethod* single_frame_deopt_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -614,6 +621,14 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
+  // Compiled code made an explicit deoptimization. Transfer the code
+  // to interpreter and clear the counter to JIT the method again.
+  ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
+  DCHECK(deopt_method != nullptr);
+  deopt_method->ClearCounter();
+  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+      deopt_method, GetQuickToInterpreterBridge());
+
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
   #ifdef __LP64__
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 931e581..fe8eb0d 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -775,7 +775,7 @@
   std::unique_ptr<const OatFile> oat_file(
       OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
   if (oat_file == nullptr) {
-    LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
+    LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 63e6326..90539b4 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -164,14 +164,20 @@
 
 class DeoptimizationContextRecord {
  public:
-  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+  DeoptimizationContextRecord(const JValue& ret_val,
+                              bool is_reference,
+                              bool from_code,
                               mirror::Throwable* pending_exception,
                               DeoptimizationContextRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+      : ret_val_(ret_val),
+        is_reference_(is_reference),
+        from_code_(from_code),
+        pending_exception_(pending_exception),
         link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
+  bool GetFromCode() const { return from_code_; }
   mirror::Throwable* GetPendingException() const { return pending_exception_; }
   DeoptimizationContextRecord* GetLink() const { return link_; }
   mirror::Object** GetReturnValueAsGCRoot() {
@@ -189,6 +195,9 @@
   // Indicates whether the returned value is a reference. If so, the GC will visit it.
   const bool is_reference_;
 
+  // Whether the context was created from an explicit deoptimization in the code.
+  const bool from_code_;
+
   // The exception that was pending before deoptimization (or null if there was no pending
   // exception).
   mirror::Throwable* pending_exception_;
@@ -220,22 +229,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+void Thread::PushDeoptimizationContext(const JValue& return_value,
+                                       bool is_reference,
+                                       bool from_code,
                                        mirror::Throwable* exception) {
   DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
       return_value,
       is_reference,
+      from_code,
       exception,
       tlsPtr_.deoptimization_context_stack);
   tlsPtr_.deoptimization_context_stack = record;
 }
 
-void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+void Thread::PopDeoptimizationContext(JValue* result,
+                                      mirror::Throwable** exception,
+                                      bool* from_code) {
   AssertHasDeoptimizationContext();
   DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
   tlsPtr_.deoptimization_context_stack = record->GetLink();
   result->SetJ(record->GetReturnValue().GetJ());
   *exception = record->GetPendingException();
+  *from_code = record->GetFromCode();
   delete record;
 }
 
@@ -2546,7 +2561,8 @@
     if (is_deoptimization) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
-      PushDeoptimizationContext(JValue(), false, exception);
+      PushDeoptimizationContext(
+          JValue(), /*is_reference */ false, /* from_code */ false, exception);
     }
   }
   // Don't leave exception visible while we try to find the handler, which may cause class
diff --git a/runtime/thread.h b/runtime/thread.h
index 4624f27..3abb3cf 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -849,10 +849,14 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+  // 'from_code' denotes whether the deoptimization was explicitly made from
+  // compiled code.
+  void PushDeoptimizationContext(const JValue& return_value,
+                                 bool is_reference,
+                                 bool from_code,
                                  mirror::Throwable* exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception, bool* from_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index ffeae7d..c3d2759 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -652,20 +652,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -685,27 +684,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
@@ -725,25 +722,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end < array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
@@ -770,18 +767,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
@@ -816,14 +814,18 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  //  array.length is defined before the loop header so no phi is needed.
-  /// CHECK-NOT: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
@@ -831,8 +833,8 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // One HDeoptimize will be added.
-    // It's for (end - 2 <= array.length - 2).
+    // Several HDeoptimize will be added. Two for each index.
+    // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -859,7 +861,6 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-
   /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
@@ -874,23 +875,27 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
   /// CHECK: Goto
-  /// CHECK-NOT: Deoptimize
+  /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 2, one for end <= array.length - 3,
-    // and one for null check on array (to hoist null
-    // check and array.length out of loop).
+    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -914,20 +919,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo7(int[] array, int start, int end, boolean lowEnd) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. One for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -955,26 +959,28 @@
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK: Goto
+  /// CHECK: Goto
+  /// CHECK: If
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo8(int[][] matrix, int start, int end) {
-    // Three HDeoptimize will be added for the outer loop.
-    // start >= 0, end <= matrix.length, and null check on matrix.
-    // Three HDeoptimize will be added for the inner loop
-    // start >= 0 (TODO: this may be optimized away),
-    // end <= row.length, and null check on row.
+    // Three HDeoptimize will be added for the outer loop,
+    // two for the index, and null check on matrix. Same
+    // for the inner loop.
     for (int i = start; i < end; i++) {
       int[] row = matrix[i];
       for (int j = start; j < end; j++) {
@@ -994,15 +1000,22 @@
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Phi
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  //  Simplification removes the redundant check
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: Deoptimize
+
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for
-    // 10 <= array.length, and one for null check on array.
+    // Two HDeoptimize will be added. Two for the index
+    // and one for null check on array.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index ec63057..f1f80ca 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -618,6 +618,57 @@
     getSuper();
   }
 
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
+  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Phi>>            Phi [<<Array>>,<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<NC:l\d+>>        NullCheck [<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:                     ArrayGet [<<NC>>,{{i\d+}}] klass:java.lang.Object exact:false
+  private Object[] testInstructionsWithUntypedParent() {
+    Object[] array = null;
+    boolean cond = true;
+    for (int i = 0; i < 10; ++i) {
+      if (cond) {
+        array = new Object[10];
+        array[0] = new Object();
+        cond = false;
+      } else {
+        array[i] = array[0];
+      }
+    }
+    return array;
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 6dce96c..5080f142 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -19,23 +19,25 @@
   public void invokeVirtual() {
   }
 
-  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) builder (after)
+  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeVirtual
   /// CHECK-DAG:                      ReturnVoid
 
   /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
   /// CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void inlineSharpenInvokeVirtual(Main m) {
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenStringInvoke() ssa_builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeVirtual
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index c1f25b3..87ff3f7 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect recursive:true
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect recursive:true
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt
index b0014d7..42b331f 100644
--- a/test/492-checker-inline-invoke-interface/expected.txt
+++ b/test/492-checker-inline-invoke-interface/expected.txt
@@ -2,4 +2,4 @@
 java.lang.Exception
 	at ForceStatic.<clinit>(Main.java:24)
 	at Main.$inline$foo(Main.java:31)
-	at Main.main(Main.java:48)
+	at Main.main(Main.java:50)
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 9a45485..a8b6307 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,15 +31,17 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
   /// CHECK:           InvokeStaticOrDirect
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeVirtual
+  /// CHECK-NOT:       InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK:           ClinitCheck
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 58c92f1..34d2f64 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -29,6 +29,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linear(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -41,6 +42,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDown(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -53,6 +55,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearObscure(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -66,6 +69,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearVeryObscure(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -79,6 +83,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWhile(int[] x) {
     int i = 0;
     int result = 0;
@@ -92,6 +97,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearThreeWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -108,6 +114,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearFourWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -128,6 +135,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinear(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -143,6 +151,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -162,6 +171,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearWithParameter(int n) {
     int[] x = new int[n];
     for (int i = 0; i < n; i++) {
@@ -174,6 +184,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearCopy(int x[]) {
     int n = x.length;
     int y[] = new int[n];
@@ -183,10 +194,55 @@
     return y;
   }
 
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwo(int x[]) {
+    int n = x.length / 2;
+    int result = 0;
+    for (int i = 0; i < n; i++) {
+      int ii = i << 1;
+      result += x[ii];
+      result += x[ii + 1];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip1(int x[]) {
+    int result = 0;
+    for (int i = 0; i < x.length / 2; i++) {
+      result += x[2 * i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip2(int x[]) {
+    int result = 0;
+    // This case is not optimized.
+    for (int i = 0; i < x.length; i+=2) {
+      result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithCompoundStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
     int result = 0;
@@ -202,6 +258,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -218,6 +275,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -234,6 +292,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -250,6 +309,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -266,6 +326,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -279,6 +340,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -292,6 +354,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -306,6 +369,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -320,6 +384,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -330,10 +395,31 @@
     return result;
   }
 
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int invariantFromPreLoop(int[] x, int y) {
+    int result = 0;
+    // Strange pre-loop that sets upper bound.
+    int hi;
+    while (true) {
+      y = y % 3;
+      hi = x.length;
+      if (y != 123) break;
+    }
+    for (int i = 0; i < hi; i++) {
+       result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicIdiom(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -350,6 +436,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence2(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -372,6 +459,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence4(int tc) {
     int[] x = { 1, 3, 5, 7 };
     // Loop with periodic sequence (0, 1, 2, 3).
@@ -395,6 +483,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -408,6 +497,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -421,6 +511,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -434,6 +525,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -448,6 +540,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -461,6 +554,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -474,6 +568,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -487,6 +582,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -501,6 +597,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
@@ -511,6 +608,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
@@ -521,6 +619,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
@@ -533,6 +632,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
@@ -541,6 +641,306 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE1(int[] x, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[offset + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int wrapAroundDynamicBCE(int[] x) {
+    int w = 9;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int periodicDynamicBCE(int[] x) {
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // This loop could be infinite for hi = max int. Since i is also used
+    // as subscript, however, dynamic bce can proceed.
+    int result = 0;
+    for (int i = lo; i <= hi; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // As above, but now the index is not used as subscript,
+    // and dynamic bce is not applied.
+    int result = 0;
+    for (int k = 0, i = lo; i <= hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
+    int result = 0;
+    // Mix of int and long induction.
+    int k = 0;
+    for (long i = lo; i < hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: ArrayGet
+  static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
+    // Deliberately test array length on a before the loop so that only bounds checks
+    // on constant subscripts remain, making them a viable candidate for hoisting.
+    if (a.length == 0) {
+      return -1;
+    }
+    // Loop that allows BCE on x[i].
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += x[i];
+      if ((i % 10) != 0) {
+        // None of the subscripts inside a conditional are removed by dynamic bce,
+        // making them a candidate for deoptimization based on constant indices.
+        // Compiler should ensure the array loads are not subsequently hoisted
+        // "above" the deoptimization "barrier" on the bounds.
+        a[0][i] = 1;
+        a[1][i] = 2;
+        a[99][i] = 3;
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
+                                                  boolean[] r,
+                                                  byte[] s,
+                                                  char[] t,
+                                                  short[] u,
+                                                  int[] v,
+                                                  long[] w,
+                                                  float[] x,
+                                                  double[] y,
+                                                  Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
+                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+    }
+    return result;
+  }
+
   //
   // Verifier.
   //
@@ -596,6 +996,9 @@
     }
 
     // Linear with non-unit strides.
+    expectEquals(55, linearByTwo(x));
+    expectEquals(25, linearByTwoSkip1(x));
+    expectEquals(25, linearByTwoSkip2(x));
     expectEquals(56, linearWithCompoundStride());
     expectEquals(66, linearWithLargePositiveStride());
     expectEquals(66, linearWithVeryLargePositiveStride());
@@ -608,6 +1011,7 @@
     expectEquals(55, linearDoWhileUp());
     expectEquals(55, linearDoWhileDown());
     expectEquals(55, linearShort());
+    expectEquals(55, invariantFromPreLoop(x, 1));
 
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
@@ -690,6 +1094,86 @@
       sResult += 1000;
     }
     expectEquals(1055, sResult);
+
+    // Dynamic BCE.
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, -1, x.length);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE1(x, 0, x.length);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, 0, x.length + 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Dynamic BCE with offset.
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE2(x, 0, x.length, 0);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1054, sResult);
+
+    // Dynamic BCE candidates.
+    expectEquals(55, wrapAroundDynamicBCE(x));
+    expectEquals(15, periodicDynamicBCE(x));
+    expectEquals(55, dynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEMixedInductionTypes(x, 0, 10));
+
+    // Dynamic BCE combined with constant indices.
+    int[][] a;
+    a = new int[0][0];
+    expectEquals(-1, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    a = new int[100][10];
+    expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    for (int i = 0; i < 10; i++) {
+      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
+    }
+    a = new int[2][10];
+    sResult = 0;
+    try {
+      expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    expectEquals(a[0][1], 1);
+    expectEquals(a[1][1], 2);
+
+    // Dynamic BCE combined with constant indices of all types.
+    boolean[] x1 = { true };
+    byte[] x2 = { 2 };
+    char[] x3 = { 3 };
+    short[] x4 = { 4 };
+    int[] x5 = { 5 };
+    long[] x6 = { 6 };
+    float[] x7 = { 7 };
+    double[] x8 = { 8 };
+    Integer[] x9 = { 9 };
+    expectEquals(505,
+        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 1b784ae..3f65d5a 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -35,7 +35,7 @@
   }
 
   /// CHECK-START: boolean Main.stringEqualsNull() register (after)
-  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      <<Invoke:z\d+>> InvokeVirtual
   /// CHECK:      Return [<<Invoke>>]
   public static boolean stringEqualsNull() {
     String o = (String)myObject;
@@ -47,7 +47,7 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeVirtual
   /// CHECK-NOT:      test
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
@@ -56,7 +56,7 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      InvokeVirtual
   /// CHECK:      test
   /// CHECK:      jz/eq
   // Check that we don't try to compare the classes.
diff --git a/test/551-invoke-super/expected.txt b/test/551-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-invoke-super/expected.txt
diff --git a/test/551-invoke-super/info.txt b/test/551-invoke-super/info.txt
new file mode 100644
index 0000000..864ddfe
--- /dev/null
+++ b/test/551-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when resolving to an abstract method.
diff --git a/test/551-invoke-super/smali/invokesuper.smali b/test/551-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/551-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/551-invoke-super/smali/superclass.smali b/test/551-invoke-super/smali/superclass.smali
new file mode 100644
index 0000000..47fbee7
--- /dev/null
+++ b/test/551-invoke-super/smali/superclass.smali
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method abstract public returnInt()I
+.end method
diff --git a/test/551-invoke-super/src/Main.java b/test/551-invoke-super/src/Main.java
new file mode 100644
index 0000000..3a30184
--- /dev/null
+++ b/test/551-invoke-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected AbstractMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof AbstractMethodError)) {
+        throw new Error("Expected AbstractMethodError");
+      }
+    }
+  }
+}
diff --git a/test/552-checker-sharpening/expected.txt b/test/552-checker-sharpening/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-sharpening/expected.txt
diff --git a/test/552-checker-sharpening/info.txt b/test/552-checker-sharpening/info.txt
new file mode 100644
index 0000000..c84539c
--- /dev/null
+++ b/test/552-checker-sharpening/info.txt
@@ -0,0 +1 @@
+Tests for sharpening.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
new file mode 100644
index 0000000..d50edd8
--- /dev/null
+++ b/test/552-checker-sharpening/src/Main.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean doThrow = false;
+
+  private static int $noinline$foo(int x) {
+    if (doThrow) { throw new Error(); }
+    return x;
+  }
+
+  /// CHECK-START: int Main.testSimple(int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  public static int testSimple(int x) {
+    // This call should use PC-relative dex cache array load to retrieve the target method.
+    return $noinline$foo(x);
+  }
+
+  /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           If
+
+  public static int testDiamond(boolean negate, int x) {
+    // These calls should use PC-relative dex cache array loads to retrieve the target method.
+    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    if (negate) {
+      return $noinline$foo(-x);
+    } else {
+      return $noinline$foo(x);
+    }
+  }
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  public static int testLoop(int[] array, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    for (int i : array) {
+      x += $noinline$foo(i);
+    }
+    return x;
+  }
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // but not outside the if.
+    if (array != null) {
+      for (int i : array) {
+        if (negate) {
+          x += $noinline$foo(-i);
+        } else {
+          x += $noinline$foo(i);
+        }
+      }
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(1, testSimple(1));
+    assertIntEquals(1, testDiamond(false, 1));
+    assertIntEquals(-1, testDiamond(true, 1));
+    assertIntEquals(3, testLoop(new int[]{ 2 }, 1));
+    assertIntEquals(8, testLoop(new int[]{ 3, 4 }, 1));
+    assertIntEquals(1, testLoopWithDiamond(null, false, 1));
+    assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
+    assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+  }
+}
diff --git a/test/552-invoke-non-existent-super/expected.txt b/test/552-invoke-non-existent-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-invoke-non-existent-super/expected.txt
diff --git a/test/552-invoke-non-existent-super/info.txt b/test/552-invoke-non-existent-super/info.txt
new file mode 100644
index 0000000..c5428d4
--- /dev/null
+++ b/test/552-invoke-non-existent-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when the super class does not have the method.
diff --git a/test/552-invoke-non-existent-super/smali/invokesuper.smali b/test/552-invoke-non-existent-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/552-invoke-non-existent-super/smali/superclass.smali b/test/552-invoke-non-existent-super/smali/superclass.smali
new file mode 100644
index 0000000..21d961e
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/superclass.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/552-invoke-non-existent-super/src/Main.java b/test/552-invoke-non-existent-super/src/Main.java
new file mode 100644
index 0000000..c264471
--- /dev/null
+++ b/test/552-invoke-non-existent-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected NoSuchMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+  }
+}
diff --git a/test/553-invoke-super/expected.txt b/test/553-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/553-invoke-super/expected.txt
diff --git a/test/553-invoke-super/info.txt b/test/553-invoke-super/info.txt
new file mode 100644
index 0000000..ad99030
--- /dev/null
+++ b/test/553-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode.
diff --git a/test/553-invoke-super/smali/invokesuper.smali b/test/553-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..a6f9b4e
--- /dev/null
+++ b/test/553-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {v1}, LInvokeSuper;->$noinline$returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public $noinline$returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/553-invoke-super/src/Main.java b/test/553-invoke-super/src/Main.java
new file mode 100644
index 0000000..91d2394
--- /dev/null
+++ b/test/553-invoke-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/553-invoke-super/src/SuperClass.java b/test/553-invoke-super/src/SuperClass.java
new file mode 100644
index 0000000..36ce093
--- /dev/null
+++ b/test/553-invoke-super/src/SuperClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  boolean doThrow = false;
+
+  public int $noinline$returnInt() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 42;
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c830ad4..0925d36 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -258,8 +258,10 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat
+  117-nopatchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 47fc50f..c79f4b9 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -128,7 +128,7 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test