Merge "Revert "Revert "Revert "Use Jack lang dev version for compiling art tests""""
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index ff41736..0afec2d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -74,6 +74,7 @@
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
 ART_GTEST_oat_file_assistant_test_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 ART_GTEST_oat_file_test_DEX_DEPS := Main MultiDex
+ART_GTEST_oat_test_DEX_DEPS := Main
 ART_GTEST_object_test_DEX_DEPS := ProtoCompare ProtoCompare2 StaticsFromCode XandY
 ART_GTEST_proxy_test_DEX_DEPS := Interfaces
 ART_GTEST_reflection_test_DEX_DEPS := Main NonStaticLeafMethods StaticLeafMethods
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc
index 0940a80..3ca518b 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/buffered_output_stream.cc
@@ -25,12 +25,13 @@
 
 bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
-    Flush();
+    if (!Flush()) {
+      return false;
+    }
     return out_->WriteFully(buffer, byte_count);
   }
   if (used_ + byte_count > kBufferSize) {
-    bool success = Flush();
-    if (!success) {
+    if (!Flush()) {
       return false;
     }
   }
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
index 15fc033..b447f41 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/buffered_output_stream.h
@@ -36,11 +36,11 @@
 
   virtual off_t Seek(off_t offset, Whence whence);
 
+  bool Flush();
+
  private:
   static const size_t kBufferSize = 8 * KB;
 
-  bool Flush();
-
   OutputStream* const out_;
 
   uint8_t buffer_[kBufferSize];
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 6fd4575..508b04a 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -48,11 +48,11 @@
     // Pretty-print CFI opcodes.
     constexpr bool is64bit = false;
     dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
-    dwarf::WriteDebugFrameCIE(is64bit, dwarf::DW_EH_PE_absptr, dwarf::Reg(8),
-                              initial_opcodes, kCFIFormat, &debug_frame_data_);
+    dwarf::WriteCIE(is64bit, dwarf::Reg(8),
+                    initial_opcodes, kCFIFormat, &debug_frame_data_);
     std::vector<uintptr_t> debug_frame_patches;
-    dwarf::WriteDebugFrameFDE(is64bit, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
-                              kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+    dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi),
+                    kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
     ReformatCfi(Objdump(false, "-W"), &lines);
     // Pretty-print assembly.
     auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 8788dc1..3a9ce1b 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -22,6 +22,10 @@
 
 namespace art {
 
+namespace jit {
+  class JitCodeCache;
+}
+
 class ArtMethod;
 class Backend;
 struct CompilationUnit;
@@ -58,6 +62,13 @@
                                      uint32_t method_idx,
                                      const DexFile& dex_file) const = 0;
 
+  virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
+                          jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
+                          ArtMethod* method ATTRIBUTE_UNUSED)
+      SHARED_REQUIRES(Locks::mutator_lock_) {
+    return false;
+  }
+
   virtual uintptr_t GetEntryPointOf(ArtMethod* method) const
      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
 
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 036da2e..b1acf5e 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -447,7 +447,7 @@
 
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
   // Emit relative calls anywhere in the image or within a dex file otherwise.
-  return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
+  return cu->compiler_driver->IsBootImage() || cu->dex_file == target_method.dex_file;
 }
 
 /*
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d055b37..aa5e411 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -341,7 +341,7 @@
                                Compiler::Kind compiler_kind,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures* instruction_set_features,
-                               bool image, std::unordered_set<std::string>* image_classes,
+                               bool boot_image, std::unordered_set<std::string>* image_classes,
                                std::unordered_set<std::string>* compiled_classes,
                                std::unordered_set<std::string>* compiled_methods,
                                size_t thread_count, bool dump_stats, bool dump_passes,
@@ -361,7 +361,7 @@
       compiled_methods_lock_("compiled method lock"),
       compiled_methods_(MethodTable::key_compare()),
       non_relative_linker_patch_count_(0u),
-      image_(image),
+      boot_image_(boot_image),
       image_classes_(image_classes),
       classes_to_compile_(compiled_classes),
       methods_to_compile_(compiled_methods),
@@ -383,7 +383,7 @@
 
   compiler_->Init();
 
-  CHECK_EQ(image_, image_classes_.get() != nullptr);
+  CHECK_EQ(boot_image_, image_classes_.get() != nullptr);
 
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
@@ -559,7 +559,7 @@
     }
   } else if ((access_flags & kAccAbstract) != 0) {
     // Abstract methods don't have code.
-  } else if (Runtime::Current()->IsAotCompiler()) {
+  } else {
     const VerifiedMethod* verified_method =
         driver->GetVerificationResults()->GetVerifiedMethod(method_ref);
     bool compile = compilation_enabled &&
@@ -598,13 +598,6 @@
               ? dex_to_dex_compilation_level
               : optimizer::DexToDexCompilationLevel::kRequired);
     }
-  } else {
-    // This is for the JIT compiler, which has already ensured the class is verified.
-    // We can go straight to compiling.
-    DCHECK(Runtime::Current()->UseJit());
-    compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type,
-                                                     class_def_idx, method_idx, class_loader,
-                                                     dex_file, dex_cache);
   }
   if (kTimeCompileMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
@@ -696,42 +689,6 @@
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 }
 
-CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) {
-  DCHECK_EQ(method,
-            method->GetInterfaceMethodIfProxy(
-                Runtime::Current()->GetClassLinker()->GetImagePointerSize()));
-  const uint32_t method_idx = method->GetDexMethodIndex();
-  const uint32_t access_flags = method->GetAccessFlags();
-  const InvokeType invoke_type = method->GetInvokeType();
-  StackHandleScope<2> hs(self);
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      method->GetDeclaringClass()->GetClassLoader()));
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
-  jobject jclass_loader = class_loader.ToJObject();
-  const DexFile* dex_file = method->GetDexFile();
-  const uint16_t class_def_idx = method->GetClassDefIndex();
-  const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-  optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
-      GetDexToDexCompilationLevel(self, *this, class_loader, *dex_file, class_def);
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
-  // Go to native so that we don't block GC during compilation.
-  ScopedThreadSuspension sts(self, kNative);
-  CompileMethod(self,
-                this,
-                code_item,
-                access_flags,
-                invoke_type,
-                class_def_idx,
-                method_idx,
-                jclass_loader,
-                *dex_file,
-                dex_to_dex_compilation_level,
-                true,
-                dex_cache);
-  auto* compiled_method = GetCompiledMethod(MethodReference(dex_file, method_idx));
-  return compiled_method;
-}
-
 void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool, TimingLogger* timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
@@ -781,7 +738,7 @@
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     // NOTE: Currently unreachable, all callers check IsImage().
     return false;
   } else {
@@ -790,7 +747,7 @@
 }
 
 bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
-  if (kRestrictCompilationFiltersToImage && !IsImage()) {
+  if (kRestrictCompilationFiltersToImage && !IsBootImage()) {
     return true;
   }
 
@@ -801,7 +758,7 @@
 }
 
 bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const {
-  if (kRestrictCompilationFiltersToImage && !IsImage()) {
+  if (kRestrictCompilationFiltersToImage && !IsBootImage()) {
     return true;
   }
 
@@ -889,7 +846,7 @@
 // Make a list of descriptors for classes to include in the image
 void CompilerDriver::LoadImageClasses(TimingLogger* timings) {
   CHECK(timings != nullptr);
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     return;
   }
 
@@ -1118,7 +1075,7 @@
 };
 
 void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
-  if (IsImage()) {
+  if (IsBootImage()) {
     TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
 
     Runtime* runtime = Runtime::Current();
@@ -1145,7 +1102,7 @@
     // Having the klass reference here implies that the klass is already loaded.
     return true;
   }
-  if (!IsImage()) {
+  if (!IsBootImage()) {
     // Assume loaded only if klass is in the boot image. App classes cannot be assumed
     // loaded because we don't even know what class loader will be used to load them.
     bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace();
@@ -1157,7 +1114,7 @@
 }
 
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
-  if (IsImage() &&
+  if (IsBootImage() &&
       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
     {
       ScopedObjectAccess soa(Thread::Current());
@@ -1183,7 +1140,7 @@
   // See also Compiler::ResolveDexFile
 
   bool result = false;
-  if (IsImage()) {
+  if (IsBootImage()) {
     // We resolve all const-string strings when building for the image.
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
@@ -1300,7 +1257,7 @@
   if (compiling_boot) {
     // boot -> boot class pointers.
     // True if the class is in the image at boot compiling time.
-    const bool is_image_class = IsImage() && IsImageClass(
+    const bool is_image_class = IsBootImage() && IsImageClass(
         dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_));
     // True if pc relative load works.
     if (is_image_class && support_boot_image_fixup) {
@@ -1548,7 +1505,7 @@
   }
   if (!use_dex_cache && force_relocations) {
     bool is_in_image;
-    if (IsImage()) {
+    if (IsBootImage()) {
       is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
     } else {
       is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
@@ -2019,7 +1976,7 @@
 
   ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
                                      thread_pool);
-  if (IsImage()) {
+  if (IsBootImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
     TimingLogger::ScopedTiming t("Resolve Types", timings);
@@ -2101,8 +2058,8 @@
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
       // we rely on things working OK without verification when the decryption dialog is brought up.
       // So abort in a debug build if we find this violated.
-      DCHECK(!manager_->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class "
-          << PrettyClass(klass.Get()) << " failed to fully verify.";
+      DCHECK(!manager_->GetCompiler()->IsBootImage() || klass->IsVerified())
+          << "Boot classpath class " << PrettyClass(klass.Get()) << " failed to fully verify.";
     }
     soa.Self()->AssertNoPendingException();
   }
@@ -2222,7 +2179,7 @@
           if (!klass->IsInitialized()) {
             // We need to initialize static fields, we only do this for image classes that aren't
             // marked with the $NoPreloadHolder (which implies this should not be initialized early).
-            bool can_init_static_fields = manager_->GetCompiler()->IsImage() &&
+            bool can_init_static_fields = manager_->GetCompiler()->IsBootImage() &&
                 manager_->GetCompiler()->IsImageClass(descriptor) &&
                 !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
             if (can_init_static_fields) {
@@ -2286,7 +2243,7 @@
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
                                      thread_pool);
   size_t thread_count;
-  if (IsImage()) {
+  if (IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
     thread_count = 1U;
   } else {
@@ -2304,7 +2261,7 @@
     CHECK(dex_file != nullptr);
     InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
-  if (IsImage()) {
+  if (IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
     Runtime::Current()->GetHeap()->CollectGarbage(true);
   }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 4ed4dc6..5683b03 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -92,7 +92,7 @@
                  Compiler::Kind compiler_kind,
                  InstructionSet instruction_set,
                  const InstructionSetFeatures* instruction_set_features,
-                 bool image, std::unordered_set<std::string>* image_classes,
+                 bool boot_image, std::unordered_set<std::string>* image_classes,
                  std::unordered_set<std::string>* compiled_classes,
                  std::unordered_set<std::string>* compiled_methods,
                  size_t thread_count, bool dump_stats, bool dump_passes,
@@ -119,9 +119,6 @@
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
-  CompiledMethod* CompileArtMethod(Thread* self, ArtMethod*)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!compiled_methods_lock_) WARN_UNUSED;
-
   // Compile a single Method.
   void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings)
       SHARED_REQUIRES(Locks::mutator_lock_)
@@ -156,8 +153,8 @@
   }
 
   // Are we compiling and creating an image file?
-  bool IsImage() const {
-    return image_;
+  bool IsBootImage() const {
+    return boot_image_;
   }
 
   const std::unordered_set<std::string>* GetImageClasses() const {
@@ -637,7 +634,7 @@
   // in the .oat_patches ELF section if requested in the compiler options.
   size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_);
 
-  const bool image_;
+  const bool boot_image_;
 
   // If image_ is true, specifies the classes that will be included in
   // the image. Note if image_classes_ is null, all classes are
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index 3ba380e..a412a99 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -122,12 +122,12 @@
   DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
 
   DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(is64bit ? 16 : 8),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
+  WriteCIE(is64bit, Reg(is64bit ? 16 : 8),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
   std::vector<uintptr_t> debug_frame_patches;
   std::vector<uintptr_t> expected_patches { 28 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+  WriteFDE(is64bit, 0, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()),
+           kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
 
   EXPECT_EQ(expected_patches, debug_frame_patches);
   CheckObjdumpOutput(is64bit, "-W");
@@ -136,14 +136,14 @@
 TEST_F(DwarfTest, DebugFrame64) {
   constexpr bool is64bit = true;
   DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
+  WriteCIE(is64bit, Reg(16),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
   DebugFrameOpCodeWriter<> opcodes;
   std::vector<uintptr_t> debug_frame_patches;
   std::vector<uintptr_t> expected_patches { 32 };  // NOLINT
-  WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     ArrayRef<const uint8_t>(*opcodes.data()),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+  WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000,
+           ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
   DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
 
   EXPECT_EQ(expected_patches, debug_frame_patches);
@@ -176,12 +176,12 @@
   DW_CHECK_NEXT("DW_CFA_offset: r14 (r14)");
   DW_CHECK_NEXT("DW_CFA_offset: r15 (r15)");
   DebugFrameOpCodeWriter<> initial_opcodes;
-  WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16),
-                     initial_opcodes, kCFIFormat, &debug_frame_data_);
+  WriteCIE(is64bit, Reg(16),
+           initial_opcodes, kCFIFormat, &debug_frame_data_);
   std::vector<uintptr_t> debug_frame_patches;
-  WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
-                     ArrayRef<const uint8_t>(*opcodes.data()),
-                     kCFIFormat, &debug_frame_data_, &debug_frame_patches);
+  WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000,
+           ArrayRef<const uint8_t>(*opcodes.data()),
+                     kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches);
 
   CheckObjdumpOutput(is64bit, "-W");
 }
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index f819c49..5464ed9 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -59,38 +59,27 @@
   std::vector<std::string> Objdump(const char* args) {
     // Write simple elf file with just the DWARF sections.
     InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86;
-    class NoCode : public CodeOutput {
-      bool Write(OutputStream*) OVERRIDE { return true; }  // NOLINT
-    } no_code;
-    ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0);
-    typedef typename ElfBuilder<ElfTypes>::RawSection RawSection;
-    RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    RawSection debug_frame(".debug_frame", SHT_PROGBITS, 0, nullptr, 0, 8, 0);
+    ScratchFile file;
+    FileOutputStream output_stream(file.GetFile());
+    ElfBuilder<ElfTypes> builder(isa, &output_stream);
+    builder.Start();
     if (!debug_info_data_.empty()) {
-      debug_info.SetBuffer(debug_info_data_);
-      builder.RegisterSection(&debug_info);
+      builder.WriteSection(".debug_info", &debug_info_data_);
     }
     if (!debug_abbrev_data_.empty()) {
-      debug_abbrev.SetBuffer(debug_abbrev_data_);
-      builder.RegisterSection(&debug_abbrev);
+      builder.WriteSection(".debug_abbrev", &debug_abbrev_data_);
     }
     if (!debug_str_data_.empty()) {
-      debug_str.SetBuffer(debug_str_data_);
-      builder.RegisterSection(&debug_str);
+      builder.WriteSection(".debug_str", &debug_str_data_);
     }
     if (!debug_line_data_.empty()) {
-      debug_line.SetBuffer(debug_line_data_);
-      builder.RegisterSection(&debug_line);
+      builder.WriteSection(".debug_line", &debug_line_data_);
     }
     if (!debug_frame_data_.empty()) {
-      debug_frame.SetBuffer(debug_frame_data_);
-      builder.RegisterSection(&debug_frame);
+      builder.WriteSection(".debug_frame", &debug_frame_data_);
     }
-    ScratchFile file;
-    builder.Write(file.GetFile());
+    builder.End();
+    EXPECT_TRUE(builder.Good());
 
     // Read the elf file back using objdump.
     std::vector<std::string> lines;
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index f3fba4b..883d756 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -38,15 +38,14 @@
 
 // Write common information entry (CIE) to .debug_frame or .eh_frame section.
 template<typename Vector>
-void WriteDebugFrameCIE(bool is64bit,
-                        ExceptionHeaderValueApplication address_type,
-                        Reg return_address_register,
-                        const DebugFrameOpCodeWriter<Vector>& opcodes,
-                        CFIFormat format,
-                        std::vector<uint8_t>* debug_frame) {
+void WriteCIE(bool is64bit,
+              Reg return_address_register,
+              const DebugFrameOpCodeWriter<Vector>& opcodes,
+              CFIFormat format,
+              std::vector<uint8_t>* buffer) {
   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
 
-  Writer<> writer(debug_frame);
+  Writer<> writer(buffer);
   size_t cie_header_start_ = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
   writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF);  // CIE id.
@@ -57,17 +56,17 @@
   writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
   writer.PushUleb128(1);  // z: Augmentation data size.
   if (is64bit) {
-    if (address_type == DW_EH_PE_pcrel) {
+    if (format == DW_EH_FRAME_FORMAT) {
       writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8);   // R: Pointer encoding.
     } else {
-      DCHECK(address_type == DW_EH_PE_absptr);
+      DCHECK(format == DW_DEBUG_FRAME_FORMAT);
       writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8);  // R: Pointer encoding.
     }
   } else {
-    if (address_type == DW_EH_PE_pcrel) {
+    if (format == DW_EH_FRAME_FORMAT) {
       writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4);   // R: Pointer encoding.
     } else {
-      DCHECK(address_type == DW_EH_PE_absptr);
+      DCHECK(format == DW_DEBUG_FRAME_FORMAT);
       writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4);  // R: Pointer encoding.
     }
   }
@@ -78,30 +77,44 @@
 
 // Write frame description entry (FDE) to .debug_frame or .eh_frame section.
 inline
-void WriteDebugFrameFDE(bool is64bit, size_t cie_offset,
-                        uint64_t initial_address, uint64_t address_range,
-                        const ArrayRef<const uint8_t>& opcodes,
-                        CFIFormat format,
-                        std::vector<uint8_t>* debug_frame,
-                        std::vector<uintptr_t>* debug_frame_patches) {
-  Writer<> writer(debug_frame);
+void WriteFDE(bool is64bit,
+              uint64_t section_address,  // Absolute address of the section.
+              uint64_t cie_address,  // Absolute address of last CIE.
+              uint64_t code_address,
+              uint64_t code_size,
+              const ArrayRef<const uint8_t>& opcodes,
+              CFIFormat format,
+              uint64_t buffer_address,  // Address of buffer in linked application.
+              std::vector<uint8_t>* buffer,
+              std::vector<uintptr_t>* patch_locations) {
+  CHECK_GE(cie_address, section_address);
+  CHECK_GE(buffer_address, section_address);
+
+  Writer<> writer(buffer);
   size_t fde_header_start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
   if (format == DW_EH_FRAME_FORMAT) {
-    uint32_t cie_pointer = writer.data()->size() - cie_offset;
+    uint32_t cie_pointer = (buffer_address + buffer->size()) - cie_address;
     writer.PushUint32(cie_pointer);
   } else {
-    uint32_t cie_pointer = cie_offset;
+    DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+    uint32_t cie_pointer = cie_address - section_address;
     writer.PushUint32(cie_pointer);
   }
-  // Relocate initial_address, but not address_range (it is size).
-  debug_frame_patches->push_back(writer.data()->size());
-  if (is64bit) {
-    writer.PushUint64(initial_address);
-    writer.PushUint64(address_range);
+  if (format == DW_EH_FRAME_FORMAT) {
+    // .eh_frame encodes the location as relative address.
+    code_address -= buffer_address + buffer->size();
   } else {
-    writer.PushUint32(initial_address);
-    writer.PushUint32(address_range);
+    DCHECK(format == DW_DEBUG_FRAME_FORMAT);
+    // Relocate code_address if it has absolute value.
+    patch_locations->push_back(buffer_address + buffer->size() - section_address);
+  }
+  if (is64bit) {
+    writer.PushUint64(code_address);
+    writer.PushUint64(code_size);
+  } else {
+    writer.PushUint32(code_address);
+    writer.PushUint32(code_size);
   }
   writer.PushUleb128(0);  // Augmentation data size.
   writer.PushData(opcodes);
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index bbd962f..895dfcc 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -21,27 +21,58 @@
 
 #include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/casts.h"
 #include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
+#include "leb128.h"
 
 namespace art {
 
-class CodeOutput {
- public:
-  virtual bool Write(OutputStream* out) = 0;
-  virtual ~CodeOutput() {}
-};
-
 // Writes ELF file.
-// The main complication is that the sections often want to reference
-// each other.  We solve this by writing the ELF file in two stages:
-//  * Sections are asked about their size, and overall layout is calculated.
-//  * Sections do the actual writes which may use offsets of other sections.
+//
+// The basic layout of the elf file:
+//   Elf_Ehdr                    - The ELF header.
+//   Elf_Phdr[]                  - Program headers for the linker.
+//   .rodata                     - DEX files and oat metadata.
+//   .text                       - Compiled code.
+//   .bss                        - Zero-initialized writeable section.
+//   .dynstr                     - Names for .dynsym.
+//   .dynsym                     - A few oat-specific dynamic symbols.
+//   .hash                       - Hash-table for .dynsym.
+//   .dynamic                    - Tags which let the linker locate .dynsym.
+//   .strtab                     - Names for .symtab.
+//   .symtab                     - Debug symbols.
+//   .eh_frame                   - Unwind information (CFI).
+//   .eh_frame_hdr               - Index of .eh_frame.
+//   .debug_frame                - Unwind information (CFI).
+//   .debug_frame.oat_patches    - Addresses for relocation.
+//   .debug_info                 - Debug information.
+//   .debug_info.oat_patches     - Addresses for relocation.
+//   .debug_abbrev               - Decoding information for .debug_info.
+//   .debug_str                  - Strings for .debug_info.
+//   .debug_line                 - Line number tables.
+//   .debug_line.oat_patches     - Addresses for relocation.
+//   .text.oat_patches           - Addresses for relocation.
+//   .shstrtab                   - Names of ELF sections.
+//   Elf_Shdr[]                  - Section headers.
+//
+// Some section are optional (the debug sections in particular).
+//
+// We try write the section data directly into the file without much
+// in-memory buffering.  This means we generally write sections based on the
+// dependency order (e.g. .dynamic points to .dynsym which points to .text).
+//
+// In the cases where we need to buffer, we write the larger section first
+// and buffer the smaller one (e.g. .strtab is bigger than .symtab).
+//
+// The debug sections are written last for easier stripping.
+//
 template <typename ElfTypes>
 class ElfBuilder FINAL {
  public:
+  static constexpr size_t kMaxProgramHeaders = 16;
   using Elf_Addr = typename ElfTypes::Addr;
   using Elf_Off = typename ElfTypes::Off;
   using Elf_Word = typename ElfTypes::Word;
@@ -53,776 +84,420 @@
   using Elf_Dyn = typename ElfTypes::Dyn;
 
   // Base class of all sections.
-  class Section {
+  class Section : public OutputStream {
    public:
-    Section(const std::string& name, Elf_Word type, Elf_Word flags,
-            const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize)
-        : header_(), section_index_(0), name_(name), link_(link) {
+    Section(ElfBuilder<ElfTypes>* owner, const std::string& name,
+            Elf_Word type, Elf_Word flags, const Section* link,
+            Elf_Word info, Elf_Word align, Elf_Word entsize)
+        : OutputStream(name), owner_(owner), header_(),
+          section_index_(0), name_(name), link_(link),
+          started_(false), finished_(false), phdr_flags_(PF_R), phdr_type_(0) {
+      DCHECK_GE(align, 1u);
       header_.sh_type = type;
       header_.sh_flags = flags;
       header_.sh_info = info;
       header_.sh_addralign = align;
       header_.sh_entsize = entsize;
     }
-    virtual ~Section() {}
 
-    // Returns the size of the content of this section.  It is used to
-    // calculate file offsets of all sections before doing any writes.
-    virtual Elf_Word GetSize() const = 0;
-
-    // Write the content of this section to the given file.
-    // This must write exactly the number of bytes returned by GetSize().
-    // Offsets of all sections are known when this method is called.
-    virtual bool Write(File* elf_file) = 0;
-
-    Elf_Word GetLink() const {
-      return (link_ != nullptr) ? link_->GetSectionIndex() : 0;
+    virtual ~Section() {
+      if (started_) {
+        CHECK(finished_);
+      }
     }
 
-    const Elf_Shdr* GetHeader() const {
-      return &header_;
+    // Start writing of this section.
+    void Start() {
+      CHECK(!started_);
+      CHECK(!finished_);
+      started_ = true;
+      auto& sections = owner_->sections_;
+      // Check that the previous section is complete.
+      CHECK(sections.empty() || sections.back()->finished_);
+      // The first ELF section index is 1. Index 0 is reserved for NULL.
+      section_index_ = sections.size() + 1;
+      // Push this section on the list of written sections.
+      sections.push_back(this);
+      // Align file position.
+      if (header_.sh_type != SHT_NOBITS) {
+        header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign);
+        owner_->Seek(header_.sh_offset, kSeekSet);
+      }
+      // Align virtual memory address.
+      if ((header_.sh_flags & SHF_ALLOC) != 0) {
+        header_.sh_addr = RoundUp(owner_->virtual_address_, header_.sh_addralign);
+        owner_->virtual_address_ = header_.sh_addr;
+      }
     }
 
-    Elf_Shdr* GetHeader() {
-      return &header_;
+    // Finish writing of this section.
+    void End() {
+      CHECK(started_);
+      CHECK(!finished_);
+      finished_ = true;
+      if (header_.sh_type == SHT_NOBITS) {
+        CHECK_GT(header_.sh_size, 0u);
+      } else {
+        // Use the current file position to determine section size.
+        off_t file_offset = owner_->Seek(0, kSeekCurrent);
+        CHECK_GE(file_offset, (off_t)header_.sh_offset);
+        header_.sh_size = file_offset - header_.sh_offset;
+      }
+      if ((header_.sh_flags & SHF_ALLOC) != 0) {
+        owner_->virtual_address_ += header_.sh_size;
+      }
+    }
+
+    // Get the location of this section in virtual memory.
+    Elf_Addr GetAddress() const {
+      CHECK(started_);
+      return header_.sh_addr;
+    }
+
+    // Returns the size of the content of this section.
+    Elf_Word GetSize() const {
+      CHECK(finished_);
+      return header_.sh_size;
+    }
+
+    // Set desired allocation size for .bss section.
+    void SetSize(Elf_Word size) {
+      CHECK_EQ(header_.sh_type, (Elf_Word)SHT_NOBITS);
+      header_.sh_size = size;
+    }
+
+    // This function always succeeds to simplify code.
+    // Use builder's Good() to check the actual status.
+    bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+      CHECK(started_);
+      CHECK(!finished_);
+      owner_->WriteFully(buffer, byte_count);
+      return true;
+    }
+
+    // This function always succeeds to simplify code.
+    // Use builder's Good() to check the actual status.
+    off_t Seek(off_t offset, Whence whence) OVERRIDE {
+      // Forward the seek as-is and trust the caller to use it reasonably.
+      return owner_->Seek(offset, whence);
     }
 
     Elf_Word GetSectionIndex() const {
+      DCHECK(started_);
       DCHECK_NE(section_index_, 0u);
       return section_index_;
     }
 
-    void SetSectionIndex(Elf_Word section_index) {
-      section_index_ = section_index;
-    }
-
-    const std::string& GetName() const {
-      return name_;
-    }
-
    private:
+    ElfBuilder<ElfTypes>* owner_;
     Elf_Shdr header_;
     Elf_Word section_index_;
     const std::string name_;
     const Section* const link_;
+    bool started_;
+    bool finished_;
+    Elf_Word phdr_flags_;
+    Elf_Word phdr_type_;
+
+    friend class ElfBuilder;
 
     DISALLOW_COPY_AND_ASSIGN(Section);
   };
 
-  // Writer of .dynamic section.
-  class DynamicSection FINAL : public Section {
-   public:
-    void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) {
-      DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL));
-      dynamics_.push_back({tag, value, section});
-    }
-
-    DynamicSection(const std::string& name, Section* link)
-        : Section(name, SHT_DYNAMIC, SHF_ALLOC,
-                  link, 0, kPageSize, sizeof(Elf_Dyn)) {}
-
-    Elf_Word GetSize() const OVERRIDE {
-      return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn);
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      std::vector<Elf_Dyn> buffer;
-      buffer.reserve(dynamics_.size() + 1u);
-      for (const ElfDynamicState& it : dynamics_) {
-        if (it.section_ != nullptr) {
-          // We are adding an address relative to a section.
-          buffer.push_back(
-              {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}});
-        } else {
-          buffer.push_back({it.tag_, {it.value_}});
-        }
-      }
-      buffer.push_back({DT_NULL, {0}});
-      return WriteArray(elf_file, buffer.data(), buffer.size());
-    }
-
-   private:
-    struct ElfDynamicState {
-      Elf_Sword tag_;
-      Elf_Word value_;
-      const Section* section_;
-    };
-    std::vector<ElfDynamicState> dynamics_;
-  };
-
-  using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations,
-                           Elf_Addr buffer_address,
-                           Elf_Addr base_address,
-                           std::vector<uint8_t>* buffer);
-
-  // Section with content based on simple memory buffer.
-  // The buffer can be optionally patched before writing.
-  class RawSection FINAL : public Section {
-   public:
-    RawSection(const std::string& name, Elf_Word type, Elf_Word flags,
-               const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize,
-               PatchFn patch = nullptr, const Section* patch_base_section = nullptr)
-        : Section(name, type, flags, link, info, align, entsize),
-          patched_(false), patch_(patch), patch_base_section_(patch_base_section) {
-    }
-
-    RawSection(const std::string& name, Elf_Word type)
-        : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return buffer_.size();
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      if (!patch_locations_.empty()) {
-        DCHECK(!patched_);  // Do not patch twice.
-        DCHECK(patch_ != nullptr);
-        DCHECK(patch_base_section_ != nullptr);
-        patch_(patch_locations_,
-               this->GetHeader()->sh_addr,
-               patch_base_section_->GetHeader()->sh_addr,
-               &buffer_);
-        patched_ = true;
-      }
-      return WriteArray(elf_file, buffer_.data(), buffer_.size());
-    }
-
-    bool IsEmpty() const {
-      return buffer_.size() == 0;
-    }
-
-    std::vector<uint8_t>* GetBuffer() {
-      return &buffer_;
-    }
-
-    void SetBuffer(const std::vector<uint8_t>& buffer) {
-      buffer_ = buffer;
-    }
-
-    std::vector<uintptr_t>* GetPatchLocations() {
-      return &patch_locations_;
-    }
-
-   private:
-    std::vector<uint8_t> buffer_;
-    std::vector<uintptr_t> patch_locations_;
-    bool patched_;
-    // User-provided function to do the actual patching.
-    PatchFn patch_;
-    // The section that we patch against (usually .text).
-    const Section* patch_base_section_;
-  };
-
-  // Writer of .rodata section or .text section.
-  // The write is done lazily using the provided CodeOutput.
-  class OatSection FINAL : public Section {
-   public:
-    OatSection(const std::string& name, Elf_Word type, Elf_Word flags,
-               const Section* link, Elf_Word info, Elf_Word align,
-               Elf_Word entsize, Elf_Word size, CodeOutput* code_output)
-        : Section(name, type, flags, link, info, align, entsize),
-          size_(size), code_output_(code_output) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return size_;
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      // The BufferedOutputStream class contains the buffer as field,
-      // therefore it is too big to allocate on the stack.
-      std::unique_ptr<BufferedOutputStream> output_stream(
-          new BufferedOutputStream(new FileOutputStream(elf_file)));
-      return code_output_->Write(output_stream.get());
-    }
-
-   private:
-    Elf_Word size_;
-    CodeOutput* code_output_;
-  };
-
-  // Writer of .bss section.
-  class NoBitsSection FINAL : public Section {
-   public:
-    NoBitsSection(const std::string& name, Elf_Word size)
-        : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-          size_(size) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return size_;
-    }
-
-    bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE {
-      LOG(ERROR) << "This section should not be written to the ELF file";
-      return false;
-    }
-
-   private:
-    Elf_Word size_;
-  };
-
   // Writer of .dynstr .strtab and .shstrtab sections.
-  class StrtabSection FINAL : public Section {
+  class StringSection FINAL : public Section {
    public:
-    StrtabSection(const std::string& name, Elf_Word flags)
-        : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) {
-      buffer_.reserve(4 * KB);
-      // The first entry of strtab must be empty string.
-      buffer_ += '\0';
+    StringSection(ElfBuilder<ElfTypes>* owner, const std::string& name,
+                  Elf_Word flags, Elf_Word align)
+        : Section(owner, name, SHT_STRTAB, flags, nullptr, 0, align, 0),
+          current_offset_(0) {
     }
 
-    Elf_Word AddName(const std::string& name) {
-      Elf_Word offset = buffer_.size();
-      buffer_ += name;
-      buffer_ += '\0';
+    Elf_Word Write(const std::string& name) {
+      if (current_offset_ == 0) {
+        DCHECK(name.empty());
+      }
+      Elf_Word offset = current_offset_;
+      this->WriteFully(name.c_str(), name.length() + 1);
+      current_offset_ += name.length() + 1;
       return offset;
     }
 
-    Elf_Word GetSize() const OVERRIDE {
-      return buffer_.size();
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      return WriteArray(elf_file, buffer_.data(), buffer_.size());
-    }
-
    private:
-    std::string buffer_;
+    Elf_Word current_offset_;
   };
 
-  class HashSection;
-
   // Writer of .dynsym and .symtab sections.
-  class SymtabSection FINAL : public Section {
+  class SymbolSection FINAL : public Section {
    public:
-    // Add a symbol with given name to this symtab. The symbol refers to
-    // 'relative_addr' within the given section and has the given attributes.
-    void AddSymbol(const std::string& name, const Section* section,
-                   Elf_Addr addr, bool is_relative, Elf_Word size,
-                   uint8_t binding, uint8_t type, uint8_t other = 0) {
+    SymbolSection(ElfBuilder<ElfTypes>* owner, const std::string& name,
+                  Elf_Word type, Elf_Word flags, StringSection* strtab)
+        : Section(owner, name, type, flags, strtab, 0,
+                  sizeof(Elf_Off), sizeof(Elf_Sym)) {
+    }
+
+    // Buffer symbol for this section.  It will be written later.
+    void Add(Elf_Word name, const Section* section,
+             Elf_Addr addr, bool is_relative, Elf_Word size,
+             uint8_t binding, uint8_t type, uint8_t other = 0) {
       CHECK(section != nullptr);
-      Elf_Word name_idx = strtab_->AddName(name);
-      symbols_.push_back({ name, section, addr, size, is_relative,
-                           MakeStInfo(binding, type), other, name_idx });
+      Elf_Sym sym = Elf_Sym();
+      sym.st_name = name;
+      sym.st_value = addr + (is_relative ? section->GetAddress() : 0);
+      sym.st_size = size;
+      sym.st_other = other;
+      sym.st_shndx = section->GetSectionIndex();
+      sym.st_info = (binding << 4) + (type & 0xf);
+      symbols_.push_back(sym);
     }
 
-    SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags,
-                  StrtabSection* strtab)
-        : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)),
-          strtab_(strtab) {
-    }
-
-    bool IsEmpty() const {
-      return symbols_.empty();
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym);
-    }
-
-    bool Write(File* elf_file) OVERRIDE {
-      std::vector<Elf_Sym> buffer;
-      buffer.reserve(1u + symbols_.size());
-      buffer.push_back(Elf_Sym());  // NULL.
-      for (const ElfSymbolState& it : symbols_) {
-        Elf_Sym sym = Elf_Sym();
-        sym.st_name = it.name_idx_;
-        if (it.is_relative_) {
-          sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr;
-        } else {
-          sym.st_value = it.addr_;
-        }
-        sym.st_size = it.size_;
-        sym.st_other = it.other_;
-        sym.st_shndx = it.section_->GetSectionIndex();
-        sym.st_info = it.info_;
-        buffer.push_back(sym);
-      }
-      return WriteArray(elf_file, buffer.data(), buffer.size());
+    void Write() {
+      // The symbol table always has to start with NULL symbol.
+      Elf_Sym null_symbol = Elf_Sym();
+      this->WriteFully(&null_symbol, sizeof(null_symbol));
+      this->WriteFully(symbols_.data(), symbols_.size() * sizeof(symbols_[0]));
+      symbols_.clear();
+      symbols_.shrink_to_fit();
     }
 
    private:
-    struct ElfSymbolState {
-      const std::string name_;
-      const Section* section_;
-      Elf_Addr addr_;
-      Elf_Word size_;
-      bool is_relative_;
-      uint8_t info_;
-      uint8_t other_;
-      Elf_Word name_idx_;  // index in the strtab.
-    };
-
-    static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) {
-      return ((binding) << 4) + ((type) & 0xf);
-    }
-
-    // The symbols in the same order they will be in the symbol table.
-    std::vector<ElfSymbolState> symbols_;
-    StrtabSection* strtab_;
-
-    friend class HashSection;
+    std::vector<Elf_Sym> symbols_;
   };
 
-  // TODO: Consider removing.
-  // We use it only for the dynsym section which has only 5 symbols.
-  // We do not use it for symtab, and we probably do not have to
-  // since we use those symbols only to print backtraces.
-  class HashSection FINAL : public Section {
-   public:
-    HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab)
-        : Section(name, SHT_HASH, flags, symtab,
-                  0, sizeof(Elf_Word), sizeof(Elf_Word)),
-          symtab_(symtab) {
-    }
-
-    Elf_Word GetSize() const OVERRIDE {
-      Elf_Word nbuckets = GetNumBuckets();
-      Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */;
-      return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word);
-    }
-
-    bool Write(File* const elf_file) OVERRIDE {
-      // Here is how The ELF hash table works.
-      // There are 3 arrays to worry about.
-      // * The symbol table where the symbol information is.
-      // * The bucket array which is an array of indexes into the symtab and chain.
-      // * The chain array which is also an array of indexes into the symtab and chain.
-      //
-      // Lets say the state is something like this.
-      // +--------+       +--------+      +-----------+
-      // | symtab |       | bucket |      |   chain   |
-      // |  null  |       | 1      |      | STN_UNDEF |
-      // | <sym1> |       | 4      |      | 2         |
-      // | <sym2> |       |        |      | 5         |
-      // | <sym3> |       |        |      | STN_UNDEF |
-      // | <sym4> |       |        |      | 3         |
-      // | <sym5> |       |        |      | STN_UNDEF |
-      // +--------+       +--------+      +-----------+
-      //
-      // The lookup process (in python psudocode) is
-      //
-      // def GetSym(name):
-      //     # NB STN_UNDEF == 0
-      //     indx = bucket[elfhash(name) % num_buckets]
-      //     while indx != STN_UNDEF:
-      //         if GetSymbolName(symtab[indx]) == name:
-      //             return symtab[indx]
-      //         indx = chain[indx]
-      //     return SYMBOL_NOT_FOUND
-      //
-      // Between bucket and chain arrays every symtab index must be present exactly
-      // once (except for STN_UNDEF, which must be present 1 + num_bucket times).
-      const auto& symbols = symtab_->symbols_;
-      // Select number of buckets.
-      // This is essentially arbitrary.
-      Elf_Word nbuckets = GetNumBuckets();
-      // 1 is for the implicit NULL symbol.
-      Elf_Word chain_size = (symbols.size() + 1);
-      std::vector<Elf_Word> hash;
-      hash.push_back(nbuckets);
-      hash.push_back(chain_size);
-      uint32_t bucket_offset = hash.size();
-      uint32_t chain_offset = bucket_offset + nbuckets;
-      hash.resize(hash.size() + nbuckets + chain_size, 0);
-
-      Elf_Word* buckets = hash.data() + bucket_offset;
-      Elf_Word* chain   = hash.data() + chain_offset;
-
-      // Set up the actual hash table.
-      for (Elf_Word i = 0; i < symbols.size(); i++) {
-        // Add 1 since we need to have the null symbol that is not in the symbols
-        // list.
-        Elf_Word index = i + 1;
-        Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets;
-        if (buckets[hash_val] == 0) {
-          buckets[hash_val] = index;
-        } else {
-          hash_val = buckets[hash_val];
-          CHECK_LT(hash_val, chain_size);
-          while (chain[hash_val] != 0) {
-            hash_val = chain[hash_val];
-            CHECK_LT(hash_val, chain_size);
-          }
-          chain[hash_val] = index;
-          // Check for loops. Works because if this is non-empty then there must be
-          // another cell which already contains the same symbol index as this one,
-          // which means some symbol has more then one name, which isn't allowed.
-          CHECK_EQ(chain[index], static_cast<Elf_Word>(0));
-        }
-      }
-      return WriteArray(elf_file, hash.data(), hash.size());
-    }
-
-   private:
-    Elf_Word GetNumBuckets() const {
-      const auto& symbols = symtab_->symbols_;
-      if (symbols.size() < 8) {
-        return 2;
-      } else if (symbols.size() < 32) {
-        return 4;
-      } else if (symbols.size() < 256) {
-        return 16;
-      } else {
-        // Have about 32 ids per bucket.
-        return RoundUp(symbols.size()/32, 2);
-      }
-    }
-
-    // from bionic
-    static inline unsigned elfhash(const char *_name) {
-      const unsigned char *name = (const unsigned char *) _name;
-      unsigned h = 0, g;
-
-      while (*name) {
-        h = (h << 4) + *name++;
-        g = h & 0xf0000000;
-        h ^= g;
-        h ^= g >> 24;
-      }
-      return h;
-    }
-
-    SymtabSection* symtab_;
-
-    DISALLOW_COPY_AND_ASSIGN(HashSection);
-  };
-
-  ElfBuilder(InstructionSet isa,
-             Elf_Word rodata_size, CodeOutput* rodata_writer,
-             Elf_Word text_size, CodeOutput* text_writer,
-             Elf_Word bss_size)
+  ElfBuilder(InstructionSet isa, OutputStream* output)
     : isa_(isa),
-      dynstr_(".dynstr", SHF_ALLOC),
-      dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(".hash", SHF_ALLOC, &dynsym_),
-      rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC,
-              nullptr, 0, kPageSize, 0, rodata_size, rodata_writer),
-      text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
-            nullptr, 0, kPageSize, 0, text_size, text_writer),
-      bss_(".bss", bss_size),
-      dynamic_(".dynamic", &dynstr_),
-      strtab_(".strtab", 0),
-      symtab_(".symtab", SHT_SYMTAB, 0, &strtab_),
-      shstrtab_(".shstrtab", 0) {
+      output_(output),
+      output_good_(true),
+      output_offset_(0),
+      rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+      text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+      bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+      dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+      dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+      hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+      dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+      eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+      eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+      strtab_(this, ".strtab", 0, kPageSize),
+      symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+      debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+      shstrtab_(this, ".shstrtab", 0, 1),
+      virtual_address_(0) {
+    text_.phdr_flags_ = PF_R | PF_X;
+    bss_.phdr_flags_ = PF_R | PF_W;
+    dynamic_.phdr_flags_ = PF_R | PF_W;
+    dynamic_.phdr_type_ = PT_DYNAMIC;
+    eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME;
   }
   ~ElfBuilder() {}
 
-  OatSection* GetText() { return &text_; }
-  SymtabSection* GetSymtab() { return &symtab_; }
+  InstructionSet GetIsa() { return isa_; }
+  Section* GetRoData() { return &rodata_; }
+  Section* GetText() { return &text_; }
+  Section* GetBss() { return &bss_; }
+  StringSection* GetStrTab() { return &strtab_; }
+  SymbolSection* GetSymTab() { return &symtab_; }
+  Section* GetEhFrame() { return &eh_frame_; }
+  Section* GetEhFrameHdr() { return &eh_frame_hdr_; }
+  Section* GetDebugFrame() { return &debug_frame_; }
 
-  bool Write(File* elf_file) {
-    // Since the .text section of an oat file contains relative references to .rodata
-    // and (optionally) .bss, we keep these 2 or 3 sections together. This creates
-    // a non-traditional layout where the .bss section is mapped independently of the
-    // .dynamic section and needs its own program header with LOAD RW.
-    //
-    // The basic layout of the elf file. Order may be different in final output.
-    // +-------------------------+
-    // | Elf_Ehdr                |
-    // +-------------------------+
-    // | Elf_Phdr PHDR           |
-    // | Elf_Phdr LOAD R         | .dynsym .dynstr .hash .rodata
-    // | Elf_Phdr LOAD R X       | .text
-    // | Elf_Phdr LOAD RW        | .bss (Optional)
-    // | Elf_Phdr LOAD RW        | .dynamic
-    // | Elf_Phdr DYNAMIC        | .dynamic
-    // | Elf_Phdr LOAD R         | .eh_frame .eh_frame_hdr
-    // | Elf_Phdr EH_FRAME R     | .eh_frame_hdr
-    // +-------------------------+
-    // | .dynsym                 |
-    // | Elf_Sym  STN_UNDEF      |
-    // | Elf_Sym  oatdata        |
-    // | Elf_Sym  oatexec        |
-    // | Elf_Sym  oatlastword    |
-    // | Elf_Sym  oatbss         | (Optional)
-    // | Elf_Sym  oatbsslastword | (Optional)
-    // +-------------------------+
-    // | .dynstr                 |
-    // | names for .dynsym       |
-    // +-------------------------+
-    // | .hash                   |
-    // | hashtable for dynsym    |
-    // +-------------------------+
-    // | .rodata                 |
-    // | oatdata..oatexec-4      |
-    // +-------------------------+
-    // | .text                   |
-    // | oatexec..oatlastword    |
-    // +-------------------------+
-    // | .dynamic                |
-    // | Elf_Dyn DT_HASH         |
-    // | Elf_Dyn DT_STRTAB       |
-    // | Elf_Dyn DT_SYMTAB       |
-    // | Elf_Dyn DT_SYMENT       |
-    // | Elf_Dyn DT_STRSZ        |
-    // | Elf_Dyn DT_SONAME       |
-    // | Elf_Dyn DT_NULL         |
-    // +-------------------------+  (Optional)
-    // | .symtab                 |  (Optional)
-    // | program symbols         |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .strtab                 |  (Optional)
-    // | names for .symtab       |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .eh_frame               |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .eh_frame_hdr           |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_info             |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_abbrev           |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_str              |  (Optional)
-    // +-------------------------+  (Optional)
-    // | .debug_line             |  (Optional)
-    // +-------------------------+
-    // | .shstrtab               |
-    // | names of sections       |
-    // +-------------------------+
-    // | Elf_Shdr null           |
-    // | Elf_Shdr .dynsym        |
-    // | Elf_Shdr .dynstr        |
-    // | Elf_Shdr .hash          |
-    // | Elf_Shdr .rodata        |
-    // | Elf_Shdr .text          |
-    // | Elf_Shdr .bss           |  (Optional)
-    // | Elf_Shdr .dynamic       |
-    // | Elf_Shdr .symtab        |  (Optional)
-    // | Elf_Shdr .strtab        |  (Optional)
-    // | Elf_Shdr .eh_frame      |  (Optional)
-    // | Elf_Shdr .eh_frame_hdr  |  (Optional)
-    // | Elf_Shdr .debug_info    |  (Optional)
-    // | Elf_Shdr .debug_abbrev  |  (Optional)
-    // | Elf_Shdr .debug_str     |  (Optional)
-    // | Elf_Shdr .debug_line    |  (Optional)
-    // | Elf_Shdr .oat_patches   |  (Optional)
-    // | Elf_Shdr .shstrtab      |
-    // +-------------------------+
-    constexpr bool debug_logging_ = false;
+  // Encode patch locations as LEB128 list of deltas between consecutive addresses.
+  // (exposed publicly for tests)
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer) {
+    buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
+    uintptr_t address = 0;  // relative to start of section.
+    for (uintptr_t location : locations) {
+      DCHECK_GE(location, address) << "Patch locations are not in sorted order";
+      EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address));
+      address = location;
+    }
+  }
 
-    // Create a list of all section which we want to write.
-    // This is the order in which they will be written.
-    std::vector<Section*> sections;
-    sections.push_back(&dynsym_);
-    sections.push_back(&dynstr_);
-    sections.push_back(&hash_);
-    sections.push_back(&rodata_);
-    sections.push_back(&text_);
-    if (bss_.GetSize() != 0u) {
-      sections.push_back(&bss_);
-    }
-    sections.push_back(&dynamic_);
-    if (!symtab_.IsEmpty()) {
-      sections.push_back(&symtab_);
-      sections.push_back(&strtab_);
-    }
-    for (Section* section : other_sections_) {
-      sections.push_back(section);
-    }
-    sections.push_back(&shstrtab_);
-    for (size_t i = 0; i < sections.size(); i++) {
-      // The first section index is 1.  Index 0 is reserved for NULL.
-      // Section index is used for relative symbols and for section links.
-      sections[i]->SetSectionIndex(i + 1);
-      // Add section name to .shstrtab.
-      Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName());
-      sections[i]->GetHeader()->sh_name = name_offset;
-    }
+  void WritePatches(const char* name, const std::vector<uintptr_t>* patch_locations) {
+    std::vector<uint8_t> buffer;
+    EncodeOatPatches(*patch_locations, &buffer);
+    std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
+    s->Start();
+    s->WriteFully(buffer.data(), buffer.size());
+    s->End();
+    other_sections_.push_back(std::move(s));
+  }
 
-    // The running program does not have access to section headers
-    // and the loader is not supposed to use them either.
-    // The dynamic sections therefore replicates some of the layout
-    // information like the address and size of .rodata and .text.
-    // It also contains other metadata like the SONAME.
-    // The .dynamic section is found using the PT_DYNAMIC program header.
-    BuildDynsymSection();
-    BuildDynamicSection(elf_file->GetPath());
+  void WriteSection(const char* name, const std::vector<uint8_t>* buffer) {
+    std::unique_ptr<Section> s(new Section(this, name, SHT_PROGBITS, 0, nullptr, 0, 1, 0));
+    s->Start();
+    s->WriteFully(buffer->data(), buffer->size());
+    s->End();
+    other_sections_.push_back(std::move(s));
+  }
 
-    // We do not know the number of headers until the final stages of write.
-    // It is easiest to just reserve a fixed amount of space for them.
-    constexpr size_t kMaxProgramHeaders = 8;
-    constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr);
+  void Start() {
+    // Reserve space for ELF header and program headers.
+    // We do not know the number of headers until later, so
+    // it is easiest to just reserve a fixed amount of space.
+    int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
+    Seek(size, kSeekSet);
+    virtual_address_ += size;
+  }
 
-    // Layout of all sections - determine the final file offsets and addresses.
-    // This must be done after we have built all sections and know their size.
-    Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Elf_Addr load_address = file_offset;
-    std::vector<Elf_Shdr> section_headers;
-    section_headers.reserve(1u + sections.size());
-    section_headers.push_back(Elf_Shdr());  // NULL at index 0.
-    for (auto* section : sections) {
-      Elf_Shdr* header = section->GetHeader();
-      Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1;
-      header->sh_size = section->GetSize();
-      header->sh_link = section->GetLink();
-      // Allocate memory for the section in the file.
-      if (header->sh_type != SHT_NOBITS) {
-        header->sh_offset = RoundUp(file_offset, alignment);
-        file_offset = header->sh_offset + header->sh_size;
-      }
-      // Allocate memory for the section during program execution.
-      if ((header->sh_flags & SHF_ALLOC) != 0) {
-        header->sh_addr = RoundUp(load_address, alignment);
-        load_address = header->sh_addr + header->sh_size;
-      }
-      if (debug_logging_) {
-        LOG(INFO) << "Section " << section->GetName() << ":" << std::hex
-                  << " offset=0x" << header->sh_offset
-                  << " addr=0x" << header->sh_addr
-                  << " size=0x" << header->sh_size;
-      }
-      // Collect section headers into continuous array for convenience.
-      section_headers.push_back(*header);
-    }
-    Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off));
-
-    // Create program headers now that we know the layout of the whole file.
-    // Each segment contains one or more sections which are mapped together.
-    // Not all sections are mapped during the execution of the program.
-    // PT_LOAD does the mapping.  Other PT_* types allow the program to locate
-    // interesting parts of memory and their addresses overlap with PT_LOAD.
-    std::vector<Elf_Phdr> program_headers;
-    program_headers.push_back(Elf_Phdr());  // Placeholder for PT_PHDR.
-    // Create the main LOAD R segment which spans all sections up to .rodata.
-    const Elf_Shdr* rodata = rodata_.GetHeader();
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R,
-      0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign));
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_));
-    if (bss_.GetHeader()->sh_size != 0u) {
-      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_));
-    }
-    program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_));
-    program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_));
-    const Section* eh_frame = FindSection(".eh_frame");
-    if (eh_frame != nullptr) {
-      program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame));
-      const Section* eh_frame_hdr = FindSection(".eh_frame_hdr");
-      if (eh_frame_hdr != nullptr) {
-        // Check layout: eh_frame is before eh_frame_hdr and there is no gap.
-        CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset);
-        CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size,
-                 eh_frame_hdr->GetHeader()->sh_offset);
-        // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well.
-        program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size;
-        program_headers.back().p_memsz  += eh_frame_hdr->GetHeader()->sh_size;
-        program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr));
+  void End() {
+    // Write section names and finish the section headers.
+    shstrtab_.Start();
+    shstrtab_.Write("");
+    for (auto* section : sections_) {
+      section->header_.sh_name = shstrtab_.Write(section->name_);
+      if (section->link_ != nullptr) {
+        section->header_.sh_link = section->link_->GetSectionIndex();
       }
     }
-    DCHECK_EQ(program_headers[0].p_type, 0u);  // Check placeholder.
-    program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R,
-      kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off));
-    CHECK_LE(program_headers.size(), kMaxProgramHeaders);
+    shstrtab_.End();
 
-    // Create the main ELF header.
+    // Write section headers at the end of the ELF file.
+    std::vector<Elf_Shdr> shdrs;
+    shdrs.reserve(1u + sections_.size());
+    shdrs.push_back(Elf_Shdr());  // NULL at index 0.
+    for (auto* section : sections_) {
+      shdrs.push_back(section->header_);
+    }
+    Elf_Off section_headers_offset;
+    section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off));
+    Seek(section_headers_offset, kSeekSet);
+    WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Write the initial file headers.
+    std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
     Elf_Ehdr elf_header = MakeElfHeader(isa_);
-    elf_header.e_phoff = kProgramHeadersOffset;
+    elf_header.e_phoff = sizeof(Elf_Ehdr);
     elf_header.e_shoff = section_headers_offset;
-    elf_header.e_phnum = program_headers.size();
-    elf_header.e_shnum = section_headers.size();
+    elf_header.e_phnum = phdrs.size();
+    elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
-
-    // Write all headers and section content to the file.
-    // Depending on the implementations of Section::Write, this
-    // might be just memory copies or some more elaborate operations.
-    if (!WriteArray(elf_file, &elf_header, 1)) {
-      LOG(INFO) << "Failed to write the ELF header";
-      return false;
-    }
-    if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) {
-      LOG(INFO) << "Failed to write the program headers";
-      return false;
-    }
-    for (Section* section : sections) {
-      const Elf_Shdr* header = section->GetHeader();
-      if (header->sh_type != SHT_NOBITS) {
-        if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) {
-          LOG(INFO) << "Failed to write section " << section->GetName();
-          return false;
-        }
-        Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR);
-        CHECK_EQ(current_offset, header->sh_offset + header->sh_size)
-          << "The number of bytes written does not match GetSize()";
-      }
-    }
-    if (!SeekTo(elf_file, section_headers_offset) ||
-        !WriteArray(elf_file, section_headers.data(), section_headers.size())) {
-      LOG(INFO) << "Failed to write the section headers";
-      return false;
-    }
-    return true;
+    Seek(0, kSeekSet);
+    WriteFully(&elf_header, sizeof(elf_header));
+    WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
   }
 
-  // Adds the given section to the builder.  It does not take ownership.
-  void RegisterSection(Section* section) {
-    other_sections_.push_back(section);
+  // The running program does not have access to section headers
+  // and the loader is not supposed to use them either.
+  // The dynamic sections therefore replicates some of the layout
+  // information like the address and size of .rodata and .text.
+  // It also contains other metadata like the SONAME.
+  // The .dynamic section is found using the PT_DYNAMIC program header.
+  void WriteDynamicSection(const std::string& elf_file_path) {
+    std::string soname(elf_file_path);
+    size_t directory_separator_pos = soname.rfind('/');
+    if (directory_separator_pos != std::string::npos) {
+      soname = soname.substr(directory_separator_pos + 1);
+    }
+
+    dynstr_.Start();
+    dynstr_.Write("");  // dynstr should start with empty string.
+    dynsym_.Add(dynstr_.Write("oatdata"), &rodata_, 0, true,
+                rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
+    if (text_.GetSize() != 0u) {
+      dynsym_.Add(dynstr_.Write("oatexec"), &text_, 0, true,
+                  text_.GetSize(), STB_GLOBAL, STT_OBJECT);
+      dynsym_.Add(dynstr_.Write("oatlastword"), &text_, text_.GetSize() - 4,
+                  true, 4, STB_GLOBAL, STT_OBJECT);
+    } else if (rodata_.GetSize() != 0) {
+      // rodata_ can be size 0 for dwarf_test.
+      dynsym_.Add(dynstr_.Write("oatlastword"), &rodata_, rodata_.GetSize() - 4,
+                  true, 4, STB_GLOBAL, STT_OBJECT);
+    }
+    if (bss_.finished_) {
+      dynsym_.Add(dynstr_.Write("oatbss"), &bss_,
+                  0, true, bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
+      dynsym_.Add(dynstr_.Write("oatbsslastword"), &bss_,
+                  bss_.GetSize() - 4, true, 4, STB_GLOBAL, STT_OBJECT);
+    }
+    Elf_Word soname_offset = dynstr_.Write(soname);
+    dynstr_.End();
+
+    dynsym_.Start();
+    dynsym_.Write();
+    dynsym_.End();
+
+    // We do not really need a hash-table since there is so few entries.
+    // However, the hash-table is the only way the linker can actually
+    // determine the number of symbols in .dynsym so it is required.
+    hash_.Start();
+    int count = dynsym_.GetSize() / sizeof(Elf_Sym);  // Includes NULL.
+    std::vector<Elf_Word> hash;
+    hash.push_back(1);  // Number of buckets.
+    hash.push_back(count);  // Number of chains.
+    // Buckets.  Having just one makes it linear search.
+    hash.push_back(1);  // Point to first non-NULL symbol.
+    // Chains.  This creates linked list of symbols.
+    hash.push_back(0);  // Dummy entry for the NULL symbol.
+    for (int i = 1; i < count - 1; i++) {
+      hash.push_back(i + 1);  // Each symbol points to the next one.
+    }
+    hash.push_back(0);  // Last symbol terminates the chain.
+    hash_.WriteFully(hash.data(), hash.size() * sizeof(hash[0]));
+    hash_.End();
+
+    dynamic_.Start();
+    Elf_Dyn dyns[] = {
+      { DT_HASH, { hash_.GetAddress() } },
+      { DT_STRTAB, { dynstr_.GetAddress() } },
+      { DT_SYMTAB, { dynsym_.GetAddress() } },
+      { DT_SYMENT, { sizeof(Elf_Sym) } },
+      { DT_STRSZ, { dynstr_.GetSize() } },
+      { DT_SONAME, { soname_offset } },
+      { DT_NULL, { 0 } },
+    };
+    dynamic_.WriteFully(&dyns, sizeof(dyns));
+    dynamic_.End();
   }
 
-  const Section* FindSection(const char* name) {
-    for (const auto* section : other_sections_) {
-      if (section->GetName() == name) {
-        return section;
-      }
-    }
-    return nullptr;
+  // Returns true if all writes and seeks on the output stream succeeded.
+  bool Good() {
+    return output_good_;
   }
 
  private:
-  static bool SeekTo(File* elf_file, Elf_Word offset) {
-    DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset))
-      << "Seeking backwards";
-    if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) {
-      PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath();
-      return false;
-    }
-    return true;
-  }
-
-  template<typename T>
-  static bool WriteArray(File* elf_file, const T* data, size_t count) {
-    if (count != 0) {
-      DCHECK(data != nullptr);
-      if (!elf_file->WriteFully(data, count * sizeof(T))) {
-        PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath();
-        return false;
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  void WriteFully(const void* buffer, size_t byte_count) {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to ELF file at offset " << output_offset_;
+        output_good_ = false;
       }
     }
-    return true;
+    output_offset_ += byte_count;
   }
 
-  // Helper - create segment header based on memory range.
-  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
-                                    Elf_Off offset, Elf_Word size, Elf_Word align) {
-    Elf_Phdr phdr = Elf_Phdr();
-    phdr.p_type    = type;
-    phdr.p_flags   = flags;
-    phdr.p_offset  = offset;
-    phdr.p_vaddr   = offset;
-    phdr.p_paddr   = offset;
-    phdr.p_filesz  = size;
-    phdr.p_memsz   = size;
-    phdr.p_align   = align;
-    return phdr;
-  }
-
-  // Helper - create segment header based on section header.
-  static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags,
-                                    const Section& section) {
-    const Elf_Shdr* shdr = section.GetHeader();
-    // Only run-time allocated sections should be in segment headers.
-    CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u);
-    Elf_Phdr phdr = Elf_Phdr();
-    phdr.p_type   = type;
-    phdr.p_flags  = flags;
-    phdr.p_offset = shdr->sh_offset;
-    phdr.p_vaddr  = shdr->sh_addr;
-    phdr.p_paddr  = shdr->sh_addr;
-    phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u;
-    phdr.p_memsz  = shdr->sh_size;
-    phdr.p_align  = shdr->sh_addralign;
-    return phdr;
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == (off_t)-1) {
+        PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
   }
 
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
@@ -869,6 +544,10 @@
       }
       case kNone: {
         LOG(FATAL) << "No instruction set";
+        break;
+      }
+      default: {
+        LOG(FATAL) << "Unknown instruction set " << isa;
       }
     }
 
@@ -892,50 +571,110 @@
     return elf_header;
   }
 
-  void BuildDynamicSection(const std::string& elf_file_path) {
-    std::string soname(elf_file_path);
-    size_t directory_separator_pos = soname.rfind('/');
-    if (directory_separator_pos != std::string::npos) {
-      soname = soname.substr(directory_separator_pos + 1);
+  // Create program headers based on written sections.
+  std::vector<Elf_Phdr> MakeProgramHeaders() {
+    CHECK(!sections_.empty());
+    std::vector<Elf_Phdr> phdrs;
+    {
+      // The program headers must start with PT_PHDR which is used in
+      // loaded process to determine the number of program headers.
+      Elf_Phdr phdr = Elf_Phdr();
+      phdr.p_type    = PT_PHDR;
+      phdr.p_flags   = PF_R;
+      phdr.p_offset  = phdr.p_vaddr = phdr.p_paddr = sizeof(Elf_Ehdr);
+      phdr.p_filesz  = phdr.p_memsz = 0;  // We need to fill this later.
+      phdr.p_align   = sizeof(Elf_Off);
+      phdrs.push_back(phdr);
+      // Tell the linker to mmap the start of file to memory.
+      Elf_Phdr load = Elf_Phdr();
+      load.p_type    = PT_LOAD;
+      load.p_flags   = PF_R;
+      load.p_offset  = load.p_vaddr = load.p_paddr = 0;
+      load.p_filesz  = load.p_memsz = sections_[0]->header_.sh_offset;
+      load.p_align   = kPageSize;
+      phdrs.push_back(load);
     }
-    // NB: We must add the name before adding DT_STRSZ.
-    Elf_Word soname_offset = dynstr_.AddName(soname);
-
-    dynamic_.AddDynamicTag(DT_HASH, 0, &hash_);
-    dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_);
-    dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_);
-    dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr);
-    dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr);
-    dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr);
-  }
-
-  void BuildDynsymSection() {
-    dynsym_.AddSymbol("oatdata", &rodata_, 0, true,
-                      rodata_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_.AddSymbol("oatexec", &text_, 0, true,
-                      text_.GetSize(), STB_GLOBAL, STT_OBJECT);
-    dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4,
-                      true, 4, STB_GLOBAL, STT_OBJECT);
-    if (bss_.GetSize() != 0u) {
-      dynsym_.AddSymbol("oatbss", &bss_, 0, true,
-                        bss_.GetSize(), STB_GLOBAL, STT_OBJECT);
-      dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4,
-                        true, 4, STB_GLOBAL, STT_OBJECT);
+    // Create program headers for sections.
+    for (auto* section : sections_) {
+      const Elf_Shdr& shdr = section->header_;
+      if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        // PT_LOAD tells the linker to mmap part of the file.
+        // The linker can only mmap page-aligned sections.
+        // Single PT_LOAD may contain several ELF sections.
+        Elf_Phdr& prev = phdrs.back();
+        Elf_Phdr load = Elf_Phdr();
+        load.p_type   = PT_LOAD;
+        load.p_flags  = section->phdr_flags_;
+        load.p_offset = shdr.sh_offset;
+        load.p_vaddr  = load.p_paddr = shdr.sh_addr;
+        load.p_filesz = (shdr.sh_type != SHT_NOBITS ? shdr.sh_size : 0u);
+        load.p_memsz  = shdr.sh_size;
+        load.p_align  = shdr.sh_addralign;
+        if (prev.p_type == load.p_type &&
+            prev.p_flags == load.p_flags &&
+            prev.p_filesz == prev.p_memsz &&  // Do not merge .bss
+            load.p_filesz == load.p_memsz) {  // Do not merge .bss
+          // Merge this PT_LOAD with the previous one.
+          Elf_Word size = shdr.sh_offset + shdr.sh_size - prev.p_offset;
+          prev.p_filesz = size;
+          prev.p_memsz  = size;
+        } else {
+          // If we are adding new load, it must be aligned.
+          CHECK_EQ(shdr.sh_addralign, (Elf_Word)kPageSize);
+          phdrs.push_back(load);
+        }
+      }
     }
+    for (auto* section : sections_) {
+      const Elf_Shdr& shdr = section->header_;
+      if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) {
+        // Other PT_* types allow the program to locate interesting
+        // parts of memory at runtime. They must overlap with PT_LOAD.
+        if (section->phdr_type_ != 0) {
+          Elf_Phdr phdr = Elf_Phdr();
+          phdr.p_type   = section->phdr_type_;
+          phdr.p_flags  = section->phdr_flags_;
+          phdr.p_offset = shdr.sh_offset;
+          phdr.p_vaddr  = phdr.p_paddr = shdr.sh_addr;
+          phdr.p_filesz = phdr.p_memsz = shdr.sh_size;
+          phdr.p_align  = shdr.sh_addralign;
+          phdrs.push_back(phdr);
+        }
+      }
+    }
+    // Set the size of the initial PT_PHDR.
+    CHECK_EQ(phdrs[0].p_type, (Elf_Word)PT_PHDR);
+    phdrs[0].p_filesz = phdrs[0].p_memsz = phdrs.size() * sizeof(Elf_Phdr);
+
+    return phdrs;
   }
 
   InstructionSet isa_;
-  StrtabSection dynstr_;
-  SymtabSection dynsym_;
-  HashSection hash_;
-  OatSection rodata_;
-  OatSection text_;
-  NoBitsSection bss_;
-  DynamicSection dynamic_;
-  StrtabSection strtab_;
-  SymtabSection symtab_;
-  std::vector<Section*> other_sections_;
-  StrtabSection shstrtab_;
+
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+
+  Section rodata_;
+  Section text_;
+  Section bss_;
+  StringSection dynstr_;
+  SymbolSection dynsym_;
+  Section hash_;
+  Section dynamic_;
+  Section eh_frame_;
+  Section eh_frame_hdr_;
+  StringSection strtab_;
+  SymbolSection symtab_;
+  Section debug_frame_;
+  StringSection shstrtab_;
+  std::vector<std::unique_ptr<Section>> other_sections_;
+
+  // List of used section in the order in which they were written.
+  std::vector<Section*> sections_;
+
+  // Used for allocation of virtual address space.
+  Elf_Addr virtual_address_;
 
   DISALLOW_COPY_AND_ASSIGN(ElfBuilder);
 };
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 3a9e312..90db7eb 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -24,16 +24,16 @@
 #include "dex_file-inl.h"
 #include "dwarf/headers.h"
 #include "dwarf/register.h"
+#include "elf_builder.h"
 #include "oat_writer.h"
 #include "utils.h"
 
 namespace art {
 namespace dwarf {
 
-static void WriteDebugFrameCIE(InstructionSet isa,
-                               ExceptionHeaderValueApplication addr_type,
-                               CFIFormat format,
-                               std::vector<uint8_t>* eh_frame) {
+static void WriteCIE(InstructionSet isa,
+                     CFIFormat format,
+                     std::vector<uint8_t>* buffer) {
   // Scratch registers should be marked as undefined.  This tells the
   // debugger that its value in the previous frame is not recoverable.
   bool is64bit = Is64BitInstructionSet(isa);
@@ -59,8 +59,7 @@
         }
       }
       auto return_reg = Reg::ArmCore(14);  // R14(LR).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kArm64: {
@@ -83,8 +82,7 @@
         }
       }
       auto return_reg = Reg::Arm64Core(30);  // R30(LR).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kMips:
@@ -100,8 +98,7 @@
         }
       }
       auto return_reg = Reg::MipsCore(31);  // R31(RA).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kX86: {
@@ -127,8 +124,7 @@
         }
       }
       auto return_reg = Reg::X86Core(8);  // R8(EIP).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kX86_64: {
@@ -154,8 +150,7 @@
         }
       }
       auto return_reg = Reg::X86_64Core(16);  // R16(RIP).
-      WriteDebugFrameCIE(is64bit, addr_type, return_reg,
-                         opcodes, format, eh_frame);
+      WriteCIE(is64bit, return_reg, opcodes, format, buffer);
       return;
     }
     case kNone:
@@ -165,36 +160,69 @@
   UNREACHABLE();
 }
 
-void WriteCFISection(const CompilerDriver* compiler,
-                     const OatWriter* oat_writer,
-                     ExceptionHeaderValueApplication address_type,
-                     CFIFormat format,
-                     std::vector<uint8_t>* debug_frame,
-                     std::vector<uintptr_t>* debug_frame_patches,
-                     std::vector<uint8_t>* eh_frame_hdr,
-                     std::vector<uintptr_t>* eh_frame_hdr_patches) {
-  const auto& method_infos = oat_writer->GetMethodDebugInfo();
-  const InstructionSet isa = compiler->GetInstructionSet();
+template<typename ElfTypes>
+void WriteCFISection(ElfBuilder<ElfTypes>* builder,
+                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     CFIFormat format) {
+  CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
+        format == dwarf::DW_EH_FRAME_FORMAT);
+  typedef typename ElfTypes::Addr Elf_Addr;
+
+  std::vector<uint32_t> binary_search_table;
+  std::vector<uintptr_t> patch_locations;
+  if (format == DW_EH_FRAME_FORMAT) {
+    binary_search_table.reserve(2 * method_infos.size());
+  } else {
+    patch_locations.reserve(method_infos.size());
+  }
 
   // Write .eh_frame/.debug_frame section.
-  std::map<uint32_t, size_t> address_to_fde_offset_map;
-  size_t cie_offset = debug_frame->size();
-  WriteDebugFrameCIE(isa, address_type, format, debug_frame);
-  for (const OatWriter::DebugInfo& mi : method_infos) {
-    if (!mi.deduped_) {  // Only one FDE per unique address.
-      ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
-      if (!opcodes.empty()) {
-        address_to_fde_offset_map.emplace(mi.low_pc_, debug_frame->size());
-        WriteDebugFrameFDE(Is64BitInstructionSet(isa), cie_offset,
-                           mi.low_pc_, mi.high_pc_ - mi.low_pc_,
-                           opcodes, format, debug_frame, debug_frame_patches);
+  auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT
+                       ? builder->GetDebugFrame()
+                       : builder->GetEhFrame());
+  {
+    cfi_section->Start();
+    const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
+    const Elf_Addr text_address = builder->GetText()->GetAddress();
+    const Elf_Addr cfi_address = cfi_section->GetAddress();
+    const Elf_Addr cie_address = cfi_address;
+    Elf_Addr buffer_address = cfi_address;
+    std::vector<uint8_t> buffer;  // Small temporary buffer.
+    WriteCIE(builder->GetIsa(), format, &buffer);
+    cfi_section->WriteFully(buffer.data(), buffer.size());
+    buffer_address += buffer.size();
+    buffer.clear();
+    for (const OatWriter::DebugInfo& mi : method_infos) {
+      if (!mi.deduped_) {  // Only one FDE per unique address.
+        ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
+        if (!opcodes.empty()) {
+          const Elf_Addr code_address = text_address + mi.low_pc_;
+          if (format == DW_EH_FRAME_FORMAT) {
+            binary_search_table.push_back(
+                dchecked_integral_cast<uint32_t>(code_address));
+            binary_search_table.push_back(
+                dchecked_integral_cast<uint32_t>(buffer_address));
+          }
+          WriteFDE(is64bit, cfi_address, cie_address,
+                   code_address, mi.high_pc_ - mi.low_pc_,
+                   opcodes, format, buffer_address, &buffer,
+                   &patch_locations);
+          cfi_section->WriteFully(buffer.data(), buffer.size());
+          buffer_address += buffer.size();
+          buffer.clear();
+        }
       }
     }
+    cfi_section->End();
   }
 
   if (format == DW_EH_FRAME_FORMAT) {
+    auto* header_section = builder->GetEhFrameHdr();
+    header_section->Start();
+    uint32_t header_address = dchecked_integral_cast<int32_t>(header_section->GetAddress());
     // Write .eh_frame_hdr section.
-    Writer<> header(eh_frame_hdr);
+    std::vector<uint8_t> buffer;
+    Writer<> header(&buffer);
     header.PushUint8(1);  // Version.
     // Encoding of .eh_frame pointer - libunwind does not honor datarel here,
     // so we have to use pcrel which means relative to the pointer's location.
@@ -204,47 +232,29 @@
     // Encoding of binary search table addresses - libunwind supports only this
     // specific combination, which means relative to the start of .eh_frame_hdr.
     header.PushUint8(DW_EH_PE_datarel | DW_EH_PE_sdata4);
-    // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section
-    const int32_t relative_eh_frame_begin = -static_cast<int32_t>(debug_frame->size());
-    header.PushInt32(relative_eh_frame_begin - 4U);
+    // .eh_frame pointer
+    header.PushInt32(cfi_section->GetAddress() - (header_address + 4u));
     // Binary search table size (number of entries).
-    header.PushUint32(dchecked_integral_cast<uint32_t>(address_to_fde_offset_map.size()));
+    header.PushUint32(dchecked_integral_cast<uint32_t>(binary_search_table.size()/2));
+    header_section->WriteFully(buffer.data(), buffer.size());
     // Binary search table.
-    for (const auto& address_to_fde_offset : address_to_fde_offset_map) {
-      u_int32_t code_address = address_to_fde_offset.first;
-      int32_t fde_address = dchecked_integral_cast<int32_t>(address_to_fde_offset.second);
-      eh_frame_hdr_patches->push_back(header.data()->size());
-      header.PushUint32(code_address);
-      // We know the exact layout (eh_frame is immediately before eh_frame_hdr)
-      // and the data is relative to the start of the eh_frame_hdr,
-      // so patching isn't necessary (in contrast to the code address above).
-      header.PushInt32(relative_eh_frame_begin + fde_address);
+    for (size_t i = 0; i < binary_search_table.size(); i++) {
+      // Make addresses section-relative since we know the header address now.
+      binary_search_table[i] -= header_address;
     }
+    header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
+    header_section->End();
+  } else {
+    builder->WritePatches(".debug_frame.oat_patches", &patch_locations);
   }
 }
 
-/*
- * @brief Generate the DWARF sections.
- * @param oat_writer The Oat file Writer.
- * @param eh_frame Call Frame Information.
- * @param debug_info Compilation unit information.
- * @param debug_info_patches Address locations to be patched.
- * @param debug_abbrev Abbreviations used to generate dbg_info.
- * @param debug_str Debug strings.
- * @param debug_line Line number table.
- * @param debug_line_patches Address locations to be patched.
- */
-void WriteDebugSections(const CompilerDriver* compiler,
-                        const OatWriter* oat_writer,
-                        std::vector<uint8_t>* debug_info,
-                        std::vector<uintptr_t>* debug_info_patches,
-                        std::vector<uint8_t>* debug_abbrev,
-                        std::vector<uint8_t>* debug_str,
-                        std::vector<uint8_t>* debug_line,
-                        std::vector<uintptr_t>* debug_line_patches) {
-  const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo();
-  const InstructionSet isa = compiler->GetInstructionSet();
-  const bool is64bit = Is64BitInstructionSet(isa);
+template<typename ElfTypes>
+void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                        const std::vector<OatWriter::DebugInfo>& method_infos) {
+  typedef typename ElfTypes::Addr Elf_Addr;
+  const bool is64bit = Is64BitInstructionSet(builder->GetIsa());
+  Elf_Addr text_address = builder->GetText()->GetAddress();
 
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
@@ -273,6 +283,12 @@
   }
 
   // Write .debug_info section.
+  std::vector<uint8_t> debug_info;
+  std::vector<uintptr_t> debug_info_patches;
+  std::vector<uint8_t> debug_abbrev;
+  std::vector<uint8_t> debug_str;
+  std::vector<uint8_t> debug_line;
+  std::vector<uintptr_t> debug_line_patches;
   for (const auto& compilation_unit : compilation_units) {
     uint32_t cunit_low_pc = 0xFFFFFFFFU;
     uint32_t cunit_high_pc = 0;
@@ -281,14 +297,14 @@
       cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_);
     }
 
-    size_t debug_abbrev_offset = debug_abbrev->size();
-    DebugInfoEntryWriter<> info(is64bit, debug_abbrev);
+    size_t debug_abbrev_offset = debug_abbrev.size();
+    DebugInfoEntryWriter<> info(is64bit, &debug_abbrev);
     info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
-    info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str);
+    info.WriteStrp(DW_AT_producer, "Android dex2oat", &debug_str);
     info.WriteData1(DW_AT_language, DW_LANG_Java);
-    info.WriteAddr(DW_AT_low_pc, cunit_low_pc);
-    info.WriteAddr(DW_AT_high_pc, cunit_high_pc);
-    info.WriteData4(DW_AT_stmt_list, debug_line->size());
+    info.WriteAddr(DW_AT_low_pc, text_address + cunit_low_pc);
+    info.WriteAddr(DW_AT_high_pc, text_address + cunit_high_pc);
+    info.WriteData4(DW_AT_stmt_list, debug_line.size());
     for (auto method_info : compilation_unit) {
       std::string method_name = PrettyMethod(method_info->dex_method_index_,
                                              *method_info->dex_file_, true);
@@ -296,13 +312,13 @@
         method_name += " [DEDUPED]";
       }
       info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
-      info.WriteStrp(DW_AT_name, method_name.data(), debug_str);
-      info.WriteAddr(DW_AT_low_pc, method_info->low_pc_);
-      info.WriteAddr(DW_AT_high_pc, method_info->high_pc_);
+      info.WriteStrp(DW_AT_name, method_name.data(), &debug_str);
+      info.WriteAddr(DW_AT_low_pc, text_address + method_info->low_pc_);
+      info.WriteAddr(DW_AT_high_pc, text_address + method_info->high_pc_);
       info.EndTag();  // DW_TAG_subprogram
     }
     info.EndTag();  // DW_TAG_compile_unit
-    WriteDebugInfoCU(debug_abbrev_offset, info, debug_info, debug_info_patches);
+    WriteDebugInfoCU(debug_abbrev_offset, info, &debug_info, &debug_info_patches);
 
     // Write .debug_line section.
     std::vector<FileEntry> files;
@@ -311,7 +327,7 @@
     std::unordered_map<std::string, size_t> directories_map;
     int code_factor_bits_ = 0;
     int dwarf_isa = -1;
-    switch (isa) {
+    switch (builder->GetIsa()) {
       case kArm:  // arm actually means thumb2.
       case kThumb2:
         code_factor_bits_ = 1;  // 16-bit instuctions
@@ -328,7 +344,7 @@
         break;
     }
     DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_);
-    opcodes.SetAddress(cunit_low_pc);
+    opcodes.SetAddress(text_address + cunit_low_pc);
     if (dwarf_isa != -1) {
       opcodes.SetISA(dwarf_isa);
     }
@@ -342,6 +358,8 @@
         DefaultSrcMap dex2line_;
       } debug_info_callbacks;
 
+      Elf_Addr method_address = text_address + mi->low_pc_;
+
       const DexFile* dex = mi->dex_file_;
       if (mi->code_item_ != nullptr) {
         dex->DecodeDebugInfo(mi->code_item_,
@@ -414,26 +432,48 @@
                 int first_line = dex2line_map.front().to_;
                 // Prologue is not a sensible place for a breakpoint.
                 opcodes.NegateStmt();
-                opcodes.AddRow(mi->low_pc_, first_line);
+                opcodes.AddRow(method_address, first_line);
                 opcodes.NegateStmt();
                 opcodes.SetPrologueEnd();
               }
-              opcodes.AddRow(mi->low_pc_ + pc, line);
+              opcodes.AddRow(method_address + pc, line);
             } else if (line != opcodes.CurrentLine()) {
-              opcodes.AddRow(mi->low_pc_ + pc, line);
+              opcodes.AddRow(method_address + pc, line);
             }
           }
         }
       } else {
         // line 0 - instruction cannot be attributed to any source line.
-        opcodes.AddRow(mi->low_pc_, 0);
+        opcodes.AddRow(method_address, 0);
       }
     }
-    opcodes.AdvancePC(cunit_high_pc);
+    opcodes.AdvancePC(text_address + cunit_high_pc);
     opcodes.EndSequence();
-    WriteDebugLineTable(directories, files, opcodes, debug_line, debug_line_patches);
+    WriteDebugLineTable(directories, files, opcodes, &debug_line, &debug_line_patches);
   }
+  builder->WriteSection(".debug_info", &debug_info);
+  builder->WritePatches(".debug_info.oat_patches", &debug_info_patches);
+  builder->WriteSection(".debug_abbrev", &debug_abbrev);
+  builder->WriteSection(".debug_str", &debug_str);
+  builder->WriteSection(".debug_line", &debug_line);
+  builder->WritePatches(".debug_line.oat_patches", &debug_line_patches);
 }
 
+// Explicit instantiations
+template void WriteCFISection<ElfTypes32>(
+    ElfBuilder<ElfTypes32>* builder,
+    const std::vector<OatWriter::DebugInfo>& method_infos,
+    CFIFormat format);
+template void WriteCFISection<ElfTypes64>(
+    ElfBuilder<ElfTypes64>* builder,
+    const std::vector<OatWriter::DebugInfo>& method_infos,
+    CFIFormat format);
+template void WriteDebugSections<ElfTypes32>(
+    ElfBuilder<ElfTypes32>* builder,
+    const std::vector<OatWriter::DebugInfo>& method_infos);
+template void WriteDebugSections<ElfTypes64>(
+    ElfBuilder<ElfTypes64>* builder,
+    const std::vector<OatWriter::DebugInfo>& method_infos);
+
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index 69f7e0d..e58fd0a 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -19,29 +19,21 @@
 
 #include <vector>
 
+#include "elf_builder.h"
 #include "dwarf/dwarf_constants.h"
 #include "oat_writer.h"
 
 namespace art {
 namespace dwarf {
 
-void WriteCFISection(const CompilerDriver* compiler,
-                     const OatWriter* oat_writer,
-                     ExceptionHeaderValueApplication address_type,
-                     CFIFormat format,
-                     std::vector<uint8_t>* debug_frame,
-                     std::vector<uintptr_t>* debug_frame_patches,
-                     std::vector<uint8_t>* eh_frame_hdr,
-                     std::vector<uintptr_t>* eh_frame_hdr_patches);
+template<typename ElfTypes>
+void WriteCFISection(ElfBuilder<ElfTypes>* builder,
+                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     CFIFormat format);
 
-void WriteDebugSections(const CompilerDriver* compiler,
-                        const OatWriter* oat_writer,
-                        std::vector<uint8_t>* debug_info,
-                        std::vector<uintptr_t>* debug_info_patches,
-                        std::vector<uint8_t>* debug_abbrev,
-                        std::vector<uint8_t>* debug_str,
-                        std::vector<uint8_t>* debug_line,
-                        std::vector<uintptr_t>* debug_line_patches);
+template<typename ElfTypes>
+void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
+                        const std::vector<OatWriter::DebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index dce1e86..5c059e1 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -70,190 +70,78 @@
 template <typename ElfTypes>
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
 
-// Encode patch locations as LEB128 list of deltas between consecutive addresses.
-template <typename ElfTypes>
-void ElfWriterQuick<ElfTypes>::EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                                                std::vector<uint8_t>* buffer) {
-  buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
-  uintptr_t address = 0;  // relative to start of section.
-  for (uintptr_t location : locations) {
-    DCHECK_GE(location, address) << "Patch locations are not in sorted order";
-    EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address));
-    address = location;
-  }
-}
-
-class RodataWriter FINAL : public CodeOutput {
- public:
-  explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
-
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->WriteRodata(out);
-  }
-
- private:
-  OatWriter* oat_writer_;
-};
-
-class TextWriter FINAL : public CodeOutput {
- public:
-  explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
-
-  bool Write(OutputStream* out) OVERRIDE {
-    return oat_writer_->WriteCode(out);
-  }
-
- private:
-  OatWriter* oat_writer_;
-};
-
-enum PatchResult {
-  kAbsoluteAddress,  // Absolute memory location.
-  kPointerRelativeAddress,  // Offset relative to the location of the pointer.
-  kSectionRelativeAddress,  // Offset relative to start of containing section.
-};
-
-// Patch memory addresses within a buffer.
-// It assumes that the unpatched addresses are offsets relative to base_address.
-// (which generally means method's low_pc relative to the start of .text)
-template <typename Elf_Addr, typename Address, PatchResult kPatchResult>
-static void Patch(const std::vector<uintptr_t>& patch_locations,
-                  Elf_Addr buffer_address, Elf_Addr base_address,
-                  std::vector<uint8_t>* buffer) {
-  for (uintptr_t location : patch_locations) {
-    typedef __attribute__((__aligned__(1))) Address UnalignedAddress;
-    auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer->data() + location);
-    switch (kPatchResult) {
-      case kAbsoluteAddress:
-        *to_patch = (base_address + *to_patch);
-        break;
-      case kPointerRelativeAddress:
-        *to_patch = (base_address + *to_patch) - (buffer_address + location);
-        break;
-      case kSectionRelativeAddress:
-        *to_patch = (base_address + *to_patch) - buffer_address;
-        break;
-    }
-  }
-}
-
 template <typename ElfTypes>
 bool ElfWriterQuick<ElfTypes>::Write(
     OatWriter* oat_writer,
     const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
     const std::string& android_root_unused ATTRIBUTE_UNUSED,
     bool is_host_unused ATTRIBUTE_UNUSED) {
-  using Elf_Addr = typename ElfTypes::Addr;
   const InstructionSet isa = compiler_driver_->GetInstructionSet();
+  std::unique_ptr<BufferedOutputStream> output_stream(
+      new BufferedOutputStream(new FileOutputStream(elf_file_)));
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder(
+      new ElfBuilder<ElfTypes>(isa, output_stream.get()));
 
-  // Setup the builder with the main OAT sections (.rodata .text .bss).
-  const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
-  const size_t text_size = oat_writer->GetSize() - rodata_size;
-  const size_t bss_size = oat_writer->GetBssSize();
-  RodataWriter rodata_writer(oat_writer);
-  TextWriter text_writer(oat_writer);
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(
-      isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size));
+  builder->Start();
 
-  // Add debug sections.
-  // They are allocated here (in the same scope as the builder),
-  // but they are registered with the builder only if they are used.
-  using RawSection = typename ElfBuilder<ElfTypes>::RawSection;
-  const auto* text = builder->GetText();
-  const bool is64bit = Is64BitInstructionSet(isa);
-  const int pointer_size = GetInstructionSetPointerSize(isa);
-  std::unique_ptr<RawSection> eh_frame(new RawSection(
-      ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
-      is64bit ? Patch<Elf_Addr, uint64_t, kPointerRelativeAddress> :
-                Patch<Elf_Addr, uint32_t, kPointerRelativeAddress>,
-      text));
-  std::unique_ptr<RawSection> eh_frame_hdr(new RawSection(
-      ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0,
-      Patch<Elf_Addr, uint32_t, kSectionRelativeAddress>, text));
-  std::unique_ptr<RawSection> debug_frame(new RawSection(
-      ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, pointer_size, 0,
-      is64bit ? Patch<Elf_Addr, uint64_t, kAbsoluteAddress> :
-                Patch<Elf_Addr, uint32_t, kAbsoluteAddress>,
-      text));
-  std::unique_ptr<RawSection> debug_frame_oat_patches(new RawSection(
-      ".debug_frame.oat_patches", SHT_OAT_PATCH));
-  std::unique_ptr<RawSection> debug_info(new RawSection(
-      ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
-      Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text));
-  std::unique_ptr<RawSection> debug_info_oat_patches(new RawSection(
-      ".debug_info.oat_patches", SHT_OAT_PATCH));
-  std::unique_ptr<RawSection> debug_abbrev(new RawSection(
-      ".debug_abbrev", SHT_PROGBITS));
-  std::unique_ptr<RawSection> debug_str(new RawSection(
-      ".debug_str", SHT_PROGBITS));
-  std::unique_ptr<RawSection> debug_line(new RawSection(
-      ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0,
-      Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text));
-  std::unique_ptr<RawSection> debug_line_oat_patches(new RawSection(
-      ".debug_line.oat_patches", SHT_OAT_PATCH));
-  if (!oat_writer->GetMethodDebugInfo().empty()) {
-    if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-      // Generate CFI (stack unwinding information).
-      if (kCFIFormat == dwarf::DW_EH_FRAME_FORMAT) {
-        dwarf::WriteCFISection(
-            compiler_driver_, oat_writer,
-            dwarf::DW_EH_PE_pcrel, kCFIFormat,
-            eh_frame->GetBuffer(), eh_frame->GetPatchLocations(),
-            eh_frame_hdr->GetBuffer(), eh_frame_hdr->GetPatchLocations());
-        builder->RegisterSection(eh_frame.get());
-        builder->RegisterSection(eh_frame_hdr.get());
-      } else {
-        DCHECK(kCFIFormat == dwarf::DW_DEBUG_FRAME_FORMAT);
-        dwarf::WriteCFISection(
-            compiler_driver_, oat_writer,
-            dwarf::DW_EH_PE_absptr, kCFIFormat,
-            debug_frame->GetBuffer(), debug_frame->GetPatchLocations(),
-            nullptr, nullptr);
-        builder->RegisterSection(debug_frame.get());
-        EncodeOatPatches(*debug_frame->GetPatchLocations(),
-                         debug_frame_oat_patches->GetBuffer());
-        builder->RegisterSection(debug_frame_oat_patches.get());
-      }
+  auto* rodata = builder->GetRoData();
+  auto* text = builder->GetText();
+  auto* bss = builder->GetBss();
+
+  rodata->Start();
+  if (!oat_writer->WriteRodata(rodata)) {
+    return false;
+  }
+  rodata->End();
+
+  text->Start();
+  if (!oat_writer->WriteCode(text)) {
+    return false;
+  }
+  text->End();
+
+  if (oat_writer->GetBssSize() != 0) {
+    bss->Start();
+    bss->SetSize(oat_writer->GetBssSize());
+    bss->End();
+  }
+
+  builder->WriteDynamicSection(elf_file_->GetPath());
+
+  if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
+    const auto& method_infos = oat_writer->GetMethodDebugInfo();
+    if (!method_infos.empty()) {
       // Add methods to .symtab.
       WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate DWARF .debug_* sections.
-      dwarf::WriteDebugSections(
-          compiler_driver_, oat_writer,
-          debug_info->GetBuffer(), debug_info->GetPatchLocations(),
-          debug_abbrev->GetBuffer(),
-          debug_str->GetBuffer(),
-          debug_line->GetBuffer(), debug_line->GetPatchLocations());
-      builder->RegisterSection(debug_info.get());
-      EncodeOatPatches(*debug_info->GetPatchLocations(),
-                       debug_info_oat_patches->GetBuffer());
-      builder->RegisterSection(debug_info_oat_patches.get());
-      builder->RegisterSection(debug_abbrev.get());
-      builder->RegisterSection(debug_str.get());
-      builder->RegisterSection(debug_line.get());
-      EncodeOatPatches(*debug_line->GetPatchLocations(),
-                       debug_line_oat_patches->GetBuffer());
-      builder->RegisterSection(debug_line_oat_patches.get());
+      // Generate CFI (stack unwinding information).
+      dwarf::WriteCFISection(builder.get(), method_infos, kCFIFormat);
+      // Write DWARF .debug_* sections.
+      dwarf::WriteDebugSections(builder.get(), method_infos);
     }
   }
 
   // Add relocation section for .text.
-  std::unique_ptr<RawSection> text_oat_patches(new RawSection(
-      ".text.oat_patches", SHT_OAT_PATCH));
   if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
     // Note that ElfWriter::Fixup will be called regardless and therefore
     // we need to include oat_patches for debug sections unconditionally.
-    EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(),
-                     text_oat_patches->GetBuffer());
-    builder->RegisterSection(text_oat_patches.get());
+    builder->WritePatches(".text.oat_patches", &oat_writer->GetAbsolutePatchLocations());
   }
 
-  return builder->Write(elf_file_);
+  builder->End();
+
+  return builder->Good() && output_stream->Flush();
 }
 
 template <typename ElfTypes>
 static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
   const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
   bool generated_mapping_symbol = false;
+  auto* strtab = builder->GetStrTab();
+  auto* symtab = builder->GetSymTab();
+
+  if (method_info.empty()) {
+    return;
+  }
 
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
@@ -264,7 +152,8 @@
     }
   }
 
-  auto* symtab = builder->GetSymtab();
+  strtab->Start();
+  strtab->Write("");  // strtab should start with empty string.
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
     if (it->deduped_) {
       continue;  // Add symbol only for the first instance.
@@ -277,8 +166,8 @@
     uint32_t low_pc = it->low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += it->compiled_method_->CodeDelta();
-    symtab->AddSymbol(name, builder->GetText(), low_pc,
-                      true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+    symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
+                true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
@@ -286,12 +175,19 @@
     // requires it to match function symbol.  Just address 0 does not work.
     if (it->compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true,
-                          0, STB_LOCAL, STT_NOTYPE);
+        symtab->Add(strtab->Write("$t"), builder->GetText(), it->low_pc_ & ~1,
+                    true, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
     }
   }
+  strtab->End();
+
+  // Symbols are buffered and written after names (because they are smaller).
+  // We could also do two passes in this function to avoid the buffering.
+  symtab->Start();
+  symtab->Write();
+  symtab->End();
 }
 
 // Explicit instantiations
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index ccf34b8..b413a9e 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -21,6 +21,7 @@
 #include "common_compiler_test.h"
 #include "elf_file.h"
 #include "elf_file_impl.h"
+#include "elf_builder.h"
 #include "elf_writer_quick.h"
 #include "oat.h"
 #include "utils.h"
@@ -100,7 +101,7 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfWriterQuick32::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(patch_locations, &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index fd6cd82..a38e1f5 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -64,8 +64,10 @@
   ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str()));
 
   const uintptr_t requested_image_base = ART_BASE_ADDRESS;
-  std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, requested_image_base,
-                                                      /*compile_pic*/false));
+  std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_,
+                                                      requested_image_base,
+                                                      /*compile_pic*/false,
+                                                      /*compile_app_image*/false));
   // TODO: compile_pic should be a test argument.
   {
     {
@@ -81,8 +83,15 @@
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
-      OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(),
-                           writer.get(), &timings, &key_value_store);
+      OatWriter oat_writer(class_linker->GetBootClassPath(),
+                           0,
+                           0,
+                           0,
+                           compiler_driver_.get(),
+                           writer.get(),
+                           /*compiling_boot_image*/true,
+                           &timings,
+                           &key_value_store);
       bool success = writer->PrepareImageAddressSpace() &&
           compiler_driver_->WriteElf(GetTestAndroidRoot(),
                                      !kIsTargetBuild,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index af2a4f9..0c85323 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -20,6 +20,7 @@
 
 #include <memory>
 #include <numeric>
+#include <unordered_set>
 #include <vector>
 
 #include "art_field-inl.h"
@@ -72,6 +73,27 @@
 // Separate objects into multiple bins to optimize dirty memory use.
 static constexpr bool kBinObjects = true;
 
+// Return true if an object is already in an image space.
+bool ImageWriter::IsInBootImage(const void* obj) const {
+  if (!compile_app_image_) {
+    DCHECK(boot_image_space_ == nullptr);
+    return false;
+  }
+  const uint8_t* image_begin = boot_image_space_->Begin();
+  // Real image end including ArtMethods and ArtField sections.
+  const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize();
+  return image_begin <= obj && obj < image_end;
+}
+
+bool ImageWriter::IsInBootOatFile(const void* ptr) const {
+  if (!compile_app_image_) {
+    DCHECK(boot_image_space_ == nullptr);
+    return false;
+  }
+  const ImageHeader& image_header = boot_image_space_->GetImageHeader();
+  return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd();
+}
+
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   Class* klass = obj->GetClass();
@@ -85,12 +107,20 @@
 
 bool ImageWriter::PrepareImageAddressSpace() {
   target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
+  // Cache boot image space.
+    for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) {
+      if (space->IsImageSpace()) {
+        CHECK(compile_app_image_);
+        CHECK(boot_image_space_ == nullptr) << "Multiple image spaces";
+        boot_image_space_ = space->AsImageSpace();
+      }
+    }
   {
     ScopedObjectAccess soa(Thread::Current());
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
   }
-  gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);  // Remove garbage.
 
   // Dex caches must not have their dex fields set in the image. These are memory buffers of mapped
@@ -144,21 +174,21 @@
   Runtime::Current()->GetOatFileManager().RegisterOatFile(
       std::unique_ptr<const OatFile>(oat_file_));
 
-  interpreter_to_interpreter_bridge_offset_ =
-      oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset();
-  interpreter_to_compiled_code_bridge_offset_ =
-      oat_file_->GetOatHeader().GetInterpreterToCompiledCodeBridgeOffset();
-
-  jni_dlsym_lookup_offset_ = oat_file_->GetOatHeader().GetJniDlsymLookupOffset();
-
-  quick_generic_jni_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickGenericJniTrampolineOffset();
-  quick_imt_conflict_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickImtConflictTrampolineOffset();
-  quick_resolution_trampoline_offset_ =
-      oat_file_->GetOatHeader().GetQuickResolutionTrampolineOffset();
-  quick_to_interpreter_bridge_offset_ =
-      oat_file_->GetOatHeader().GetQuickToInterpreterBridgeOffset();
+  const OatHeader& oat_header = oat_file_->GetOatHeader();
+  oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] =
+      oat_header.GetInterpreterToInterpreterBridgeOffset();
+  oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] =
+      oat_header.GetInterpreterToCompiledCodeBridgeOffset();
+  oat_address_offsets_[kOatAddressJNIDlsymLookup] =
+      oat_header.GetJniDlsymLookupOffset();
+  oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] =
+      oat_header.GetQuickGenericJniTrampolineOffset();
+  oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] =
+      oat_header.GetQuickImtConflictTrampolineOffset();
+  oat_address_offsets_[kOatAddressQuickResolutionTrampoline] =
+      oat_header.GetQuickResolutionTrampolineOffset();
+  oat_address_offsets_[kOatAddressQuickToInterpreterBridge] =
+      oat_header.GetQuickToInterpreterBridgeOffset();
 
   size_t oat_loaded_size = 0;
   size_t oat_data_offset = 0;
@@ -307,7 +337,7 @@
   for (jobject weak_root : class_linker->GetDexCaches()) {
     mirror::DexCache* dex_cache =
         down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-    if (dex_cache == nullptr) {
+    if (dex_cache == nullptr || IsInBootImage(dex_cache)) {
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
@@ -331,6 +361,7 @@
 
 void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) {
   if (array != nullptr) {
+    DCHECK(!IsInBootImage(array));
     native_object_relocations_.emplace(
         array,
         NativeObjectRelocation { offset, kNativeObjectRelocationTypeDexCacheArray });
@@ -344,8 +375,8 @@
       auto* method = arr->GetElementPtrSize<ArtMethod*>(i, target_ptr_size_);
       if (method != nullptr && !method->IsRuntimeMethod()) {
         auto* klass = method->GetDeclaringClass();
-        CHECK(klass == nullptr || IsImageClass(klass)) << PrettyClass(klass)
-            << " should be an image class";
+        CHECK(klass == nullptr || KeepClass(klass))
+            << PrettyClass(klass) << " should be a kept class";
       }
     }
   }
@@ -539,10 +570,66 @@
   class_linker->VisitClassesWithoutClassesLock(&visitor);
 }
 
-bool ImageWriter::IsImageClass(Class* klass) {
+static bool IsBootClassLoaderClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) {
+  return klass->GetClassLoader() == nullptr;
+}
+
+bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) {
+  return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
+}
+
+bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
   if (klass == nullptr) {
     return false;
   }
+  auto found = prune_class_memo_.find(klass);
+  if (found != prune_class_memo_.end()) {
+    // Already computed, return the found value.
+    return found->second;
+  }
+  // Place holder value to prevent infinite recursion.
+  prune_class_memo_.emplace(klass, false);
+  bool result = IsBootClassLoaderNonImageClass(klass);
+  if (!result) {
+    // Check interfaces since these wont be visited through VisitReferences.)
+    mirror::IfTable* if_table = klass->GetIfTable();
+    for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+      result = result || ContainsBootClassLoaderNonImageClass(if_table->GetInterface(i));
+    }
+  }
+  // Check static fields and their classes.
+  size_t num_static_fields = klass->NumReferenceStaticFields();
+  if (num_static_fields != 0 && klass->IsResolved()) {
+    // Presumably GC can happen when we are cross compiling, it should not cause performance
+    // problems to do pointer size logic.
+    MemberOffset field_offset = klass->GetFirstReferenceStaticFieldOffset(
+        Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+    for (size_t i = 0u; i < num_static_fields; ++i) {
+      mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
+      if (ref != nullptr) {
+        if (ref->IsClass()) {
+          result = result || ContainsBootClassLoaderNonImageClass(ref->AsClass());
+        }
+        result = result || ContainsBootClassLoaderNonImageClass(ref->GetClass());
+      }
+      field_offset = MemberOffset(field_offset.Uint32Value() +
+                                  sizeof(mirror::HeapReference<mirror::Object>));
+    }
+  }
+  result = result || ContainsBootClassLoaderNonImageClass(klass->GetSuperClass());
+  prune_class_memo_[klass] = result;
+  return result;
+}
+
+bool ImageWriter::KeepClass(Class* klass) {
+  if (klass == nullptr) {
+    return false;
+  }
+  if (compile_app_image_) {
+    // For app images, we need to prune boot loader classes that are not in the boot image since
+    // these may have already been loaded when the app image is loaded.
+    return !ContainsBootClassLoaderNonImageClass(klass);
+  }
   std::string temp;
   return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp));
 }
@@ -552,21 +639,17 @@
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
   bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (!image_writer_->IsImageClass(klass)) {
-      std::string temp;
-      non_image_classes_.insert(klass->GetDescriptor(&temp));
+    if (!image_writer_->KeepClass(klass)) {
+      classes_to_prune_.insert(klass);
     }
     return true;
   }
 
-  std::set<std::string> non_image_classes_;
+  std::unordered_set<mirror::Class*> classes_to_prune_;
   ImageWriter* const image_writer_;
 };
 
 void ImageWriter::PruneNonImageClasses() {
-  if (compiler_driver_.GetImageClasses() == nullptr) {
-    return;
-  }
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
@@ -576,8 +659,14 @@
   class_linker->VisitClasses(&visitor);
 
   // Remove the undesired classes from the class roots.
-  for (const std::string& it : visitor.non_image_classes_) {
-    bool result = class_linker->RemoveClass(it.c_str(), nullptr);
+  for (mirror::Class* klass : visitor.classes_to_prune_) {
+    std::string temp;
+    const char* name = klass->GetDescriptor(&temp);
+    VLOG(compiler) << "Pruning class " << name;
+    if (!compile_app_image_) {
+      DCHECK(IsBootClassLoaderClass(klass));
+    }
+    bool result = class_linker->RemoveClass(name, klass->GetClassLoader());
     DCHECK(result);
   }
 
@@ -594,7 +683,7 @@
     }
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
-      if (klass != nullptr && !IsImageClass(klass)) {
+      if (klass != nullptr && !KeepClass(klass)) {
         dex_cache->SetResolvedType(i, nullptr);
       }
     }
@@ -607,7 +696,7 @@
         // Miranda methods may be held live by a class which was not an image class but have a
         // declaring class which is an image class. Set it to the resolution method to be safe and
         // prevent dangling pointers.
-        if (method->IsMiranda() || !IsImageClass(declaring_class)) {
+        if (method->IsMiranda() || !KeepClass(declaring_class)) {
           mirror::DexCache::SetElementPtrSize(resolved_methods,
                                               i,
                                               resolution_method,
@@ -621,7 +710,7 @@
     }
     for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
       ArtField* field = dex_cache->GetResolvedField(i, target_ptr_size_);
-      if (field != nullptr && !IsImageClass(field->GetDeclaringClass())) {
+      if (field != nullptr && !KeepClass(field->GetDeclaringClass())) {
         dex_cache->SetResolvedField(i, nullptr, target_ptr_size_);
       }
     }
@@ -632,6 +721,9 @@
 
   // Drop the array class cache in the ClassLinker, as these are roots holding those classes live.
   class_linker->DropFindArrayClassCache();
+
+  // Clear to save RAM.
+  prune_class_memo_.clear();
 }
 
 void ImageWriter::CheckNonImageClassesRemoved() {
@@ -643,13 +735,13 @@
 
 void ImageWriter::CheckNonImageClassesRemovedCallback(Object* obj, void* arg) {
   ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
-  if (obj->IsClass()) {
+  if (obj->IsClass() && !image_writer->IsInBootImage(obj)) {
     Class* klass = obj->AsClass();
-    if (!image_writer->IsImageClass(klass)) {
+    if (!image_writer->KeepClass(klass)) {
       image_writer->DumpImageClasses();
       std::string temp;
-      CHECK(image_writer->IsImageClass(klass)) << klass->GetDescriptor(&temp)
-                                               << " " << PrettyDescriptor(klass);
+      CHECK(image_writer->KeepClass(klass)) << klass->GetDescriptor(&temp)
+                                            << " " << PrettyDescriptor(klass);
     }
   }
 }
@@ -703,25 +795,35 @@
   // ObjectArray, we lock the dex lock twice, first to get the number
   // of dex caches first and then lock it again to copy the dex
   // caches. We check that the number of dex caches does not change.
-  size_t dex_cache_count;
+  size_t dex_cache_count = 0;
   {
     ReaderMutexLock mu(self, *class_linker->DexLock());
-    dex_cache_count = class_linker->GetDexCacheCount();
+    // Count number of dex caches not in the boot image.
+    for (jobject weak_root : class_linker->GetDexCaches()) {
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+      dex_cache_count += IsInBootImage(dex_cache) ? 0u : 1u;
+    }
   }
   Handle<ObjectArray<Object>> dex_caches(
-      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(),
-                                              dex_cache_count)));
+      hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), dex_cache_count)));
   CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array.";
   {
     ReaderMutexLock mu(self, *class_linker->DexLock());
-    CHECK_EQ(dex_cache_count, class_linker->GetDexCacheCount())
-        << "The number of dex caches changed.";
+    size_t non_image_dex_caches = 0;
+    // Re-count number of non image dex caches.
+    for (jobject weak_root : class_linker->GetDexCaches()) {
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+      non_image_dex_caches += IsInBootImage(dex_cache) ? 0u : 1u;
+    }
+    CHECK_EQ(dex_cache_count, non_image_dex_caches)
+        << "The number of non-image dex caches changed.";
     size_t i = 0;
     for (jobject weak_root : class_linker->GetDexCaches()) {
-      mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
-      dex_caches->Set<false>(i, dex_cache);
-      ++i;
+      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root));
+      if (!IsInBootImage(dex_cache)) {
+        dex_caches->Set<false>(i, dex_cache);
+        ++i;
+      }
     }
   }
 
@@ -761,6 +863,10 @@
 
 // For an unvisited object, visit it then all its children found via fields.
 void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) {
+  if (IsInBootImage(obj)) {
+    // Object is in the image, don't need to fix it up.
+    return;
+  }
   // Use our own visitor routine (instead of GC visitor) to get better locality between
   // an object and its fields
   if (!IsImageBinSlotAssigned(obj)) {
@@ -797,6 +903,7 @@
           CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields
                                                   << " already forwarded";
           size_t& offset = bin_slot_sizes_[kBinArtField];
+          DCHECK(!IsInBootImage(cur_fields));
           native_object_relocations_.emplace(
               cur_fields, NativeObjectRelocation {
                   offset, kNativeObjectRelocationTypeArtFieldArray });
@@ -808,6 +915,7 @@
             auto it2 = native_object_relocations_.find(field);
             CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i
                 << " already assigned " << PrettyField(field) << " static=" << field->IsStatic();
+            DCHECK(!IsInBootImage(field));
             native_object_relocations_.emplace(
                 field, NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField });
             offset += sizeof(ArtField);
@@ -843,6 +951,7 @@
         CHECK(it == native_object_relocations_.end()) << "Method array " << array
             << " already forwarded";
         size_t& offset = bin_slot_sizes_[bin_type];
+        DCHECK(!IsInBootImage(array));
         native_object_relocations_.emplace(array, NativeObjectRelocation { offset,
             any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty :
                 kNativeObjectRelocationTypeArtMethodArrayClean });
@@ -867,6 +976,7 @@
 }
 
 void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) {
+  DCHECK(!IsInBootImage(method));
   auto it = native_object_relocations_.find(method);
   CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned "
       << PrettyMethod(method);
@@ -884,10 +994,13 @@
 void ImageWriter::UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg) {
   ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
   DCHECK(writer != nullptr);
-  writer->UnbinObjectsIntoOffset(obj);
+  if (!writer->IsInBootImage(obj)) {
+    writer->UnbinObjectsIntoOffset(obj);
+  }
 }
 
 void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) {
+  DCHECK(!IsInBootImage(obj));
   CHECK(obj != nullptr);
 
   // We know the bin slot, and the total bin sizes for all objects by now,
@@ -925,13 +1038,15 @@
   image_methods_[ImageHeader::kRefsAndArgsSaveMethod] =
       runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs);
 
-  // Add room for fake length prefixed array.
+  // Add room for fake length prefixed array for holding the image methods.
   const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean;
   auto it = native_object_relocations_.find(&image_method_array_);
   CHECK(it == native_object_relocations_.end());
   size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)];
-  native_object_relocations_.emplace(&image_method_array_,
-                                     NativeObjectRelocation { offset, image_method_type });
+  if (!compile_app_image_) {
+    native_object_relocations_.emplace(&image_method_array_,
+                                       NativeObjectRelocation { offset, image_method_type });
+  }
   size_t method_alignment = ArtMethod::Alignment(target_ptr_size_);
   const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize(
       0, ArtMethod::Size(target_ptr_size_), method_alignment);
@@ -940,7 +1055,10 @@
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
     CHECK(m->IsRuntimeMethod());
-    AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean);
+    DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image";
+    if (!IsInBootImage(m)) {
+      AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean);
+    }
   }
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
@@ -1090,6 +1208,7 @@
     NativeObjectRelocation& relocation = pair.second;
     auto* dest = image_->Begin() + relocation.offset;
     DCHECK_GE(dest, image_->Begin() + image_end_);
+    DCHECK(!IsInBootImage(pair.first));
     switch (relocation.type) {
       case kNativeObjectRelocationTypeArtField: {
         memcpy(dest, pair.first, sizeof(ArtField));
@@ -1126,16 +1245,18 @@
   auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   const ImageSection& methods_section = image_header->GetMethodsSection();
   for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) {
-    auto* m = image_methods_[i];
-    CHECK(m != nullptr);
-    auto it = native_object_relocations_.find(m);
-    CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(m);
-    NativeObjectRelocation& relocation = it->second;
-    CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
-        << methods_section;
-    CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
-    auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
-    image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest);
+    ArtMethod* method = image_methods_[i];
+    CHECK(method != nullptr);
+    if (!IsInBootImage(method)) {
+      auto it = native_object_relocations_.find(method);
+      CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(method);
+      NativeObjectRelocation& relocation = it->second;
+      CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in "
+          << methods_section;
+      CHECK(relocation.IsArtMethodRelocation()) << relocation.type;
+      method = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset);
+    }
+    image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
   // Write the intern table into the image.
   const ImageSection& intern_table_section = image_header->GetImageSection(
@@ -1183,8 +1304,8 @@
   dst->SetClass(GetImageAddress(arr->GetClass()));
   auto* dest_array = down_cast<mirror::PointerArray*>(dst);
   for (size_t i = 0, count = num_elements; i < count; ++i) {
-    auto* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
-    if (elem != nullptr) {
+    void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
+    if (elem != nullptr && !IsInBootImage(elem)) {
       auto it = native_object_relocations_.find(elem);
       if (UNLIKELY(it == native_object_relocations_.end())) {
         if (it->second.IsArtMethodRelocation()) {
@@ -1209,6 +1330,9 @@
 }
 
 void ImageWriter::CopyAndFixupObject(Object* obj) {
+  if (IsInBootImage(obj)) {
+    return;
+  }
   size_t offset = GetImageOffset(obj);
   auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset);
   DCHECK_LT(offset, image_end_);
@@ -1282,18 +1406,19 @@
 
 uintptr_t ImageWriter::NativeOffsetInImage(void* obj) {
   DCHECK(obj != nullptr);
+  DCHECK(!IsInBootImage(obj));
   auto it = native_object_relocations_.find(obj);
-  CHECK(it != native_object_relocations_.end()) << obj;
+  CHECK(it != native_object_relocations_.end()) << obj << " spaces "
+      << Runtime::Current()->GetHeap()->DumpSpaces();
   const NativeObjectRelocation& relocation = it->second;
   return relocation.offset;
 }
 
 template <typename T>
 T* ImageWriter::NativeLocationInImage(T* obj) {
-  if (obj == nullptr) {
-    return nullptr;
-  }
-  return reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj));
+  return (obj == nullptr || IsInBootImage(obj))
+      ? obj
+      : reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj));
 }
 
 void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
@@ -1306,18 +1431,22 @@
   // Update dex cache strings.
   copy->SetDexCacheStrings(NativeLocationInImage(orig->GetDexCacheStrings()));
   // Fix up embedded tables.
-  if (orig->ShouldHaveEmbeddedImtAndVTable()) {
-    for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) {
-      auto it = native_object_relocations_.find(orig->GetEmbeddedVTableEntry(i, target_ptr_size_));
-      CHECK(it != native_object_relocations_.end()) << PrettyClass(orig);
-      copy->SetEmbeddedVTableEntryUnchecked(
-          i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_);
-    }
-    for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
-      auto it = native_object_relocations_.find(orig->GetEmbeddedImTableEntry(i, target_ptr_size_));
-      CHECK(it != native_object_relocations_.end()) << PrettyClass(orig);
-      copy->SetEmbeddedImTableEntry(
-          i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_);
+  if (!orig->IsTemp()) {
+    // TODO: Why do we have temp classes in some cases?
+    if (orig->ShouldHaveEmbeddedImtAndVTable()) {
+      for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) {
+        ArtMethod* orig_method = orig->GetEmbeddedVTableEntry(i, target_ptr_size_);
+        copy->SetEmbeddedVTableEntryUnchecked(
+            i,
+            NativeLocationInImage(orig_method),
+            target_ptr_size_);
+      }
+      for (size_t i = 0; i < mirror::Class::kImtSize; ++i) {
+        copy->SetEmbeddedImTableEntry(
+            i,
+            NativeLocationInImage(orig->GetEmbeddedImTableEntry(i, target_ptr_size_)),
+            target_ptr_size_);
+      }
     }
   }
   FixupClassVisitor visitor(this, copy);
@@ -1419,7 +1548,7 @@
         reinterpret_cast<ArtMethod**>(image_->Begin() + copy_methods_offset);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) {
       ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_);
-      ArtMethod* copy = NativeLocationInImage(orig);
+      ArtMethod* copy = IsInBootImage(orig) ? orig : NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
@@ -1432,15 +1561,51 @@
     ArtField** copy_fields = reinterpret_cast<ArtField**>(image_->Begin() + copy_fields_offset);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
       ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      ArtField* copy = NativeLocationInImage(orig);
+      ArtField* copy = IsInBootImage(orig) ? orig : NativeLocationInImage(orig);
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
 }
 
+const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const {
+  DCHECK_LT(type, kOatAddressCount);
+  // If we are compiling an app image, we need to use the stubs of the boot image.
+  if (compile_app_image_) {
+    // Use the current image pointers.
+    gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+    DCHECK(image_space != nullptr);
+    const OatFile* oat_file = image_space->GetOatFile();
+    CHECK(oat_file != nullptr);
+    const OatHeader& header = oat_file->GetOatHeader();
+    switch (type) {
+      // TODO: We could maybe clean this up if we stored them in an array in the oat header.
+      case kOatAddressQuickGenericJNITrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickGenericJniTrampoline());
+      case kOatAddressInterpreterToInterpreterBridge:
+        return static_cast<const uint8_t*>(header.GetInterpreterToInterpreterBridge());
+      case kOatAddressInterpreterToCompiledCodeBridge:
+        return static_cast<const uint8_t*>(header.GetInterpreterToCompiledCodeBridge());
+      case kOatAddressJNIDlsymLookup:
+        return static_cast<const uint8_t*>(header.GetJniDlsymLookup());
+      case kOatAddressQuickIMTConflictTrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickImtConflictTrampoline());
+      case kOatAddressQuickResolutionTrampoline:
+        return static_cast<const uint8_t*>(header.GetQuickResolutionTrampoline());
+      case kOatAddressQuickToInterpreterBridge:
+        return static_cast<const uint8_t*>(header.GetQuickToInterpreterBridge());
+      default:
+        UNREACHABLE();
+    }
+  }
+  return GetOatAddressForOffset(oat_address_offsets_[type]);
+}
+
 const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) {
-  DCHECK(!method->IsResolutionMethod() && !method->IsImtConflictMethod() &&
-         !method->IsImtUnimplementedMethod() && !method->IsAbstract()) << PrettyMethod(method);
+  DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method);
+  DCHECK(!method->IsImtConflictMethod()) << PrettyMethod(method);
+  DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method);
+  DCHECK(!method->IsAbstract()) << PrettyMethod(method);
+  DCHECK(!IsInBootImage(method)) << PrettyMethod(method);
 
   // Use original code if it exists. Otherwise, set the code pointer to the resolution
   // trampoline.
@@ -1448,27 +1613,26 @@
   // Quick entrypoint:
   uint32_t quick_oat_code_offset = PointerToLowMemUInt32(
       method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_));
-  const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset);
+  const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset);
   *quick_is_interpreted = false;
   if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() ||
       method->GetDeclaringClass()->IsInitialized())) {
     // We have code for a non-static or initialized method, just use the code.
-    DCHECK_GE(quick_code, oat_data_begin_);
   } else if (quick_code == nullptr && method->IsNative() &&
       (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) {
     // Non-static or initialized native method missing compiled code, use generic JNI version.
-    quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_);
-    DCHECK_GE(quick_code, oat_data_begin_);
+    quick_code = GetOatAddress(kOatAddressQuickGenericJNITrampoline);
   } else if (quick_code == nullptr && !method->IsNative()) {
     // We don't have code at all for a non-native method, use the interpreter.
-    quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_);
+    quick_code = GetOatAddress(kOatAddressQuickToInterpreterBridge);
     *quick_is_interpreted = true;
-    DCHECK_GE(quick_code, oat_data_begin_);
   } else {
     CHECK(!method->GetDeclaringClass()->IsInitialized());
     // We have code for a static method, but need to go through the resolution stub for class
     // initialization.
-    quick_code = GetOatAddress(quick_resolution_trampoline_offset_);
+    quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline);
+  }
+  if (!IsInBootOatFile(quick_code)) {
     DCHECK_GE(quick_code, oat_data_begin_);
   }
   return quick_code;
@@ -1479,16 +1643,16 @@
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
   if (UNLIKELY(method == runtime->GetResolutionMethod())) {
-    return GetOatAddress(quick_resolution_trampoline_offset_);
+    return GetOatAddress(kOatAddressQuickResolutionTrampoline);
   } else if (UNLIKELY(method == runtime->GetImtConflictMethod() ||
                       method == runtime->GetImtUnimplementedMethod())) {
-    return GetOatAddress(quick_imt_conflict_trampoline_offset_);
+    return GetOatAddress(kOatAddressQuickIMTConflictTrampoline);
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
     if (UNLIKELY(method->IsAbstract())) {
-      return GetOatAddress(quick_to_interpreter_bridge_offset_);
+      return GetOatAddress(kOatAddressQuickToInterpreterBridge);
     } else {
       bool quick_is_interpreted;
       return GetQuickCode(method, &quick_is_interpreted);
@@ -1513,11 +1677,11 @@
   Runtime* runtime = Runtime::Current();
   if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
     copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(quick_resolution_trampoline_offset_), target_ptr_size_);
+        GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
   } else if (UNLIKELY(orig == runtime->GetImtConflictMethod() ||
                       orig == runtime->GetImtUnimplementedMethod())) {
     copy->SetEntryPointFromQuickCompiledCodePtrSize(
-        GetOatAddress(quick_imt_conflict_trampoline_offset_), target_ptr_size_);
+        GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_);
   } else if (UNLIKELY(orig->IsRuntimeMethod())) {
     bool found_one = false;
     for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
@@ -1535,7 +1699,7 @@
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
     if (UNLIKELY(orig->IsAbstract())) {
       copy->SetEntryPointFromQuickCompiledCodePtrSize(
-          GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_);
+          GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_);
     } else {
       bool quick_is_interpreted;
       const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted);
@@ -1546,7 +1710,7 @@
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
         copy->SetEntryPointFromJniPtrSize(
-            GetOatAddress(jni_dlsym_lookup_offset_), target_ptr_size_);
+            GetOatAddress(kOatAddressJNIDlsymLookup), target_ptr_size_);
       }
     }
   }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 7a2febc..120de97 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -40,27 +40,42 @@
 #include "utils.h"
 
 namespace art {
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
 
 static constexpr int kInvalidImageFd = -1;
 
 // Write a Space built during compilation for use during execution.
 class ImageWriter FINAL {
  public:
-  ImageWriter(const CompilerDriver& compiler_driver, uintptr_t image_begin,
-              bool compile_pic)
-      : compiler_driver_(compiler_driver), image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
-        image_end_(0), image_objects_offset_begin_(0), image_roots_address_(0), oat_file_(nullptr),
-        oat_data_begin_(nullptr), interpreter_to_interpreter_bridge_offset_(0),
-        interpreter_to_compiled_code_bridge_offset_(0), jni_dlsym_lookup_offset_(0),
-        quick_generic_jni_trampoline_offset_(0),
-        quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
-        quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
+  ImageWriter(const CompilerDriver& compiler_driver,
+              uintptr_t image_begin,
+              bool compile_pic,
+              bool compile_app_image)
+      : compiler_driver_(compiler_driver),
+        image_begin_(reinterpret_cast<uint8_t*>(image_begin)),
+        image_end_(0),
+        image_objects_offset_begin_(0),
+        image_roots_address_(0),
+        oat_file_(nullptr),
+        oat_data_begin_(nullptr),
+        compile_pic_(compile_pic),
+        compile_app_image_(compile_app_image),
+        boot_image_space_(nullptr),
         target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())),
-        bin_slot_sizes_(), bin_slot_offsets_(), bin_slot_count_(),
-        intern_table_bytes_(0u), image_method_array_(ImageHeader::kImageMethodsCount),
-        dirty_methods_(0u), clean_methods_(0u) {
+        bin_slot_sizes_(),
+        bin_slot_offsets_(),
+        bin_slot_count_(),
+        intern_table_bytes_(0u),
+        image_method_array_(ImageHeader::kImageMethodsCount),
+        dirty_methods_(0u),
+        clean_methods_(0u) {
     CHECK_NE(image_begin, 0U);
-    std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr);
+    std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
+    std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
   }
 
   ~ImageWriter() {
@@ -74,8 +89,9 @@
 
   template <typename T>
   T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) {
-    return object == nullptr ? nullptr :
-        reinterpret_cast<T*>(image_begin_ + GetImageOffset(object));
+    return (object == nullptr || IsInBootImage(object))
+        ? object
+        : reinterpret_cast<T*>(image_begin_ + GetImageOffset(object));
   }
 
   ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_);
@@ -150,6 +166,19 @@
   };
   friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type);
 
+  enum OatAddress {
+    kOatAddressInterpreterToInterpreterBridge,
+    kOatAddressInterpreterToCompiledCodeBridge,
+    kOatAddressJNIDlsymLookup,
+    kOatAddressQuickGenericJNITrampoline,
+    kOatAddressQuickIMTConflictTrampoline,
+    kOatAddressQuickResolutionTrampoline,
+    kOatAddressQuickToInterpreterBridge,
+    // Number of elements in the enum.
+    kOatAddressCount,
+  };
+  friend std::ostream& operator<<(std::ostream& stream, const OatAddress& oat_address);
+
   static constexpr size_t kBinBits = MinimumBitsToStore<uint32_t>(kBinMirrorCount - 1);
   // uint32 = typeof(lockword_)
   // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK
@@ -215,7 +244,10 @@
     return reinterpret_cast<mirror::Object*>(dst);
   }
 
-  const uint8_t* GetOatAddress(uint32_t offset) const {
+  // Returns the address in the boot image if we are compiling the app image.
+  const uint8_t* GetOatAddress(OatAddress type) const;
+
+  const uint8_t* GetOatAddressForOffset(uint32_t offset) const {
     // With Quick, code is within the OatFile, as there are all in one
     // .o ELF object.
     DCHECK_LE(offset, oat_file_->Size());
@@ -224,7 +256,7 @@
   }
 
   // Returns true if the class was in the original requested image classes list.
-  bool IsImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+  bool KeepClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Debug aid that list of requested image classes.
   void DumpImageClasses();
@@ -299,6 +331,11 @@
   void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+
+  bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
 
   uintptr_t NativeOffsetInImage(void* obj);
@@ -306,6 +343,13 @@
   template <typename T>
   T* NativeLocationInImage(T* obj);
 
+  // Return true of obj is inside of the boot image space. This may only return true if we are
+  // compiling an app image.
+  bool IsInBootImage(const void* obj) const;
+
+  // Return true if ptr is within the boot oat file.
+  bool IsInBootOatFile(const void* ptr) const;
+
   const CompilerDriver& compiler_driver_;
 
   // Beginning target image address for the output image.
@@ -344,14 +388,14 @@
   std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_;
 
   // Offset from oat_data_begin_ to the stubs.
-  uint32_t interpreter_to_interpreter_bridge_offset_;
-  uint32_t interpreter_to_compiled_code_bridge_offset_;
-  uint32_t jni_dlsym_lookup_offset_;
-  uint32_t quick_generic_jni_trampoline_offset_;
-  uint32_t quick_imt_conflict_trampoline_offset_;
-  uint32_t quick_resolution_trampoline_offset_;
-  uint32_t quick_to_interpreter_bridge_offset_;
+  uint32_t oat_address_offsets_[kOatAddressCount];
+
+  // Boolean flags.
   const bool compile_pic_;
+  const bool compile_app_image_;
+
+  // Boot image space for fast lookups.
+  gc::space::ImageSpace* boot_image_space_;
 
   // Size of pointers on the target architecture.
   size_t target_ptr_size_;
@@ -388,6 +432,10 @@
   uint64_t dirty_methods_;
   uint64_t clean_methods_;
 
+  // Prune class memoization table.
+  std::unordered_map<mirror::Class*, bool> prune_class_memo_;
+
+  friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
   friend class FixupVisitor;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index d520208..2125c9a 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -177,7 +177,8 @@
   }
 
   // Don't compile the method if we are supposed to be deoptimized.
-  if (runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) {
+  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
     return false;
   }
 
@@ -189,13 +190,14 @@
   }
 
   // Do the compilation.
-  CompiledMethod* compiled_method = nullptr;
+  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
+  bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
-    compiled_method = compiler_driver_->CompileArtMethod(self, method_to_compile);
+    success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
   // Trim maps to reduce memory usage.
@@ -205,105 +207,14 @@
     runtime->GetArenaPool()->TrimMaps();
   }
 
-  // Check if we failed compiling.
-  if (compiled_method == nullptr) {
-    return false;
-  }
-
   total_time_ += NanoTime() - start_time;
-  bool result = false;
-  const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(method);
-
-  if (code != nullptr) {
-    // Already have some compiled code, just use this instead of linking.
-    // TODO: Fix recompilation.
-    method->SetEntryPointFromQuickCompiledCode(code);
-    result = true;
-  } else {
-    TimingLogger::ScopedTiming t2("LinkCode", &logger);
-    if (AddToCodeCache(method, compiled_method)) {
-      result = true;
-    }
-  }
-
-  // Remove the compiled method to save memory.
-  compiler_driver_->RemoveCompiledMethod(
-      MethodReference(h_class->GetDexCache()->GetDexFile(), method->GetDexMethodIndex()));
   runtime->GetJit()->AddTimingLogger(logger);
-  return result;
+  return success;
 }
 
 CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const {
   return callbacks_.get();
 }
 
-bool JitCompiler::AddToCodeCache(ArtMethod* method,
-                                 const CompiledMethod* compiled_method) {
-  Runtime* runtime = Runtime::Current();
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
-  auto const quick_code = compiled_method->GetQuickCode();
-  if (quick_code.empty()) {
-    return false;
-  }
-  const auto code_size = quick_code.size();
-  Thread* const self = Thread::Current();
-  auto const mapping_table = compiled_method->GetMappingTable();
-  auto const vmap_table = compiled_method->GetVmapTable();
-  auto const gc_map = compiled_method->GetGcMap();
-  uint8_t* mapping_table_ptr = nullptr;
-  uint8_t* vmap_table_ptr = nullptr;
-  uint8_t* gc_map_ptr = nullptr;
-
-  if (!mapping_table.empty()) {
-    // Write out pre-header stuff.
-    mapping_table_ptr = code_cache->AddDataArray(
-        self, mapping_table.data(), mapping_table.data() + mapping_table.size());
-    if (mapping_table_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  if (!vmap_table.empty()) {
-    vmap_table_ptr = code_cache->AddDataArray(
-        self, vmap_table.data(), vmap_table.data() + vmap_table.size());
-    if (vmap_table_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  if (!gc_map.empty()) {
-    gc_map_ptr = code_cache->AddDataArray(
-        self, gc_map.data(), gc_map.data() + gc_map.size());
-    if (gc_map_ptr == nullptr) {
-      return false;  // Out of data cache.
-    }
-  }
-
-  uint8_t* const code = code_cache->CommitCode(self,
-                                               method,
-                                               mapping_table_ptr,
-                                               vmap_table_ptr,
-                                               gc_map_ptr,
-                                               compiled_method->GetFrameSizeInBytes(),
-                                               compiled_method->GetCoreSpillMask(),
-                                               compiled_method->GetFpSpillMask(),
-                                               compiled_method->GetQuickCode().data(),
-                                               compiled_method->GetQuickCode().size());
-
-  if (code == nullptr) {
-    return false;
-  }
-
-  const size_t thumb_offset = compiled_method->CodeDelta();
-  const uint32_t code_offset = sizeof(OatQuickMethodHeader) + thumb_offset;
-  VLOG(jit)
-      << "JIT added "
-      << PrettyMethod(method) << "@" << method
-      << " ccache_size=" << PrettySize(code_cache->CodeCacheSize()) << ": "
-      << reinterpret_cast<void*>(code + code_offset)
-      << "," << reinterpret_cast<void*>(code + code_offset + code_size);
-  return true;
-}
-
 }  // namespace jit
 }  // namespace art
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index ea3cb66..16f641a 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -16,6 +16,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
+#include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiled_method.h"
@@ -37,6 +38,16 @@
 
 namespace art {
 
+NO_RETURN static void Usage(const char* fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  std::string error;
+  StringAppendV(&error, fmt, ap);
+  LOG(FATAL) << error;
+  va_end(ap);
+  UNREACHABLE();
+}
+
 class OatTest : public CommonCompilerTest {
  protected:
   static const bool kCompile = false;  // DISABLED_ due to the time to compile libcore
@@ -71,6 +82,67 @@
       CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
     }
   }
+
+  void SetupCompiler(Compiler::Kind compiler_kind,
+                     InstructionSet insn_set,
+                     const std::vector<std::string>& compiler_options,
+                     /*out*/std::string* error_msg) {
+    ASSERT_TRUE(error_msg != nullptr);
+    insn_features_.reset(InstructionSetFeatures::FromVariant(insn_set, "default", error_msg));
+    ASSERT_TRUE(insn_features_ != nullptr) << error_msg;
+    compiler_options_.reset(new CompilerOptions);
+    for (const std::string& option : compiler_options) {
+      compiler_options_->ParseCompilerOption(option, Usage);
+    }
+    verification_results_.reset(new VerificationResults(compiler_options_.get()));
+    method_inliner_map_.reset(new DexFileToMethodInlinerMap);
+    callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(),
+                                                method_inliner_map_.get(),
+                                                CompilerCallbacks::CallbackMode::kCompileApp));
+    Runtime::Current()->SetCompilerCallbacks(callbacks_.get());
+    timer_.reset(new CumulativeLogger("Compilation times"));
+    compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
+                                              verification_results_.get(),
+                                              method_inliner_map_.get(),
+                                              compiler_kind,
+                                              insn_set,
+                                              insn_features_.get(),
+                                              false,
+                                              nullptr,
+                                              nullptr,
+                                              nullptr,
+                                              2,
+                                              true,
+                                              true,
+                                              "",
+                                              false,
+                                              timer_.get(),
+                                              -1,
+                                              ""));
+  }
+
+  bool WriteElf(File* file,
+                const std::vector<const DexFile*>& dex_files,
+                SafeMap<std::string, std::string>& key_value_store) {
+    TimingLogger timings("WriteElf", false, false);
+    OatWriter oat_writer(dex_files,
+                         42U,
+                         4096U,
+                         0,
+                         compiler_driver_.get(),
+                         nullptr,
+                         /*compiling_boot_image*/false,
+                         &timings,
+                         &key_value_store);
+    return compiler_driver_->WriteElf(GetTestAndroidRoot(),
+                                      !kIsTargetBuild,
+                                      dex_files,
+                                      &oat_writer,
+                                      file);
+  }
+
+  std::unique_ptr<const InstructionSetFeatures> insn_features_;
+  std::unique_ptr<QuickCompilerCallbacks> callbacks_;
 };
 
 TEST_F(OatTest, WriteRead) {
@@ -80,21 +152,9 @@
   // TODO: make selectable.
   Compiler::Kind compiler_kind = Compiler::kQuick;
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
-
   std::string error_msg;
-  std::unique_ptr<const InstructionSetFeatures> insn_features(
-      InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
-  ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
-  compiler_options_.reset(new CompilerOptions);
-  verification_results_.reset(new VerificationResults(compiler_options_.get()));
-  method_inliner_map_.reset(new DexFileToMethodInlinerMap);
-  timer_.reset(new CumulativeLogger("Compilation times"));
-  compiler_driver_.reset(new CompilerDriver(compiler_options_.get(),
-                                            verification_results_.get(),
-                                            method_inliner_map_.get(),
-                                            compiler_kind, insn_set,
-                                            insn_features.get(), false, nullptr, nullptr, nullptr,
-                                            2, true, true, "", false, timer_.get(), -1, ""));
+  SetupCompiler(compiler_kind, insn_set, std::vector<std::string>(), /*out*/ &error_msg);
+
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
@@ -105,19 +165,7 @@
   ScratchFile tmp;
   SafeMap<std::string, std::string> key_value_store;
   key_value_store.Put(OatHeader::kImageLocationKey, "lue.art");
-  OatWriter oat_writer(class_linker->GetBootClassPath(),
-                       42U,
-                       4096U,
-                       0,
-                       compiler_driver_.get(),
-                       nullptr,
-                       &timings,
-                       &key_value_store);
-  bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                            !kIsTargetBuild,
-                                            class_linker->GetBootClassPath(),
-                                            &oat_writer,
-                                            tmp.GetFile());
+  bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store);
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
@@ -212,4 +260,53 @@
     ASSERT_FALSE(oat_header->IsValid());
 }
 
+TEST_F(OatTest, EmptyTextSection) {
+  TimingLogger timings("OatTest::EmptyTextSection", false, false);
+
+  // TODO: make selectable.
+  Compiler::Kind compiler_kind = Compiler::kQuick;
+  InstructionSet insn_set = kRuntimeISA;
+  if (insn_set == kArm) insn_set = kThumb2;
+  std::string error_msg;
+  std::vector<std::string> compiler_options;
+  compiler_options.push_back("--compiler-filter=verify-at-runtime");
+  SetupCompiler(compiler_kind, insn_set, compiler_options, /*out*/ &error_msg);
+
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    class_loader = LoadDex("Main");
+  }
+  ASSERT_TRUE(class_loader != nullptr);
+  std::vector<const DexFile*> dex_files = GetDexFiles(class_loader);
+  ASSERT_TRUE(!dex_files.empty());
+
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  for (const DexFile* dex_file : dex_files) {
+    ScopedObjectAccess soa(Thread::Current());
+    class_linker->RegisterDexFile(
+        *dex_file,
+        class_linker->GetOrCreateAllocatorForClassLoader(
+            soa.Decode<mirror::ClassLoader*>(class_loader)));
+  }
+  compiler_driver_->SetDexFilesForOatFile(dex_files);
+  compiler_driver_->CompileAll(class_loader, dex_files, &timings);
+
+  ScratchFile tmp;
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store);
+  ASSERT_TRUE(success);
+
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(),
+                                                  tmp.GetFilename(),
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  nullptr,
+                                                  &error_msg));
+  ASSERT_TRUE(oat_file != nullptr);
+  EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength()));
+}
+
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index c7b8884..3f2271e 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -65,10 +65,12 @@
                      int32_t image_patch_delta,
                      const CompilerDriver* compiler,
                      ImageWriter* image_writer,
+                     bool compiling_boot_image,
                      TimingLogger* timings,
                      SafeMap<std::string, std::string>* key_value_store)
   : compiler_driver_(compiler),
     image_writer_(image_writer),
+    compiling_boot_image_(compiling_boot_image),
     dex_files_(&dex_files),
     size_(0u),
     bss_size_(0u),
@@ -113,7 +115,9 @@
     size_oat_lookup_table_(0),
     method_offset_map_() {
   CHECK(key_value_store != nullptr);
-
+  if (compiling_boot_image) {
+    CHECK(image_writer != nullptr);
+  }
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
   const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
   relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features,
@@ -154,7 +158,7 @@
   }
   size_ = offset;
 
-  if (!HasImage()) {
+  if (!HasBootImage()) {
     // Allocate space for app dex cache arrays in the .bss section.
     size_t bss_start = RoundUp(size_, kPageSize);
     size_t pointer_size = GetInstructionSetPointerSize(instruction_set);
@@ -167,9 +171,10 @@
   }
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
-  CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr);
-  CHECK_EQ(compiler->IsImage(),
-           key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+  if (compiling_boot_image_) {
+    CHECK_EQ(image_writer_ != nullptr,
+             key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end());
+  }
   CHECK_ALIGNED(image_patch_delta_, kPageSize);
 }
 
@@ -672,7 +677,7 @@
       class_linker_(Runtime::Current()->GetClassLinker()),
       dex_cache_(nullptr) {
     patched_code_.reserve(16 * KB);
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       // If we're creating the image, the address space must be ready so that we can apply patches.
       CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
     }
@@ -855,7 +860,7 @@
   }
 
   uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>(
               patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
       const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_;
@@ -868,7 +873,7 @@
 
   void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       object = writer_->image_writer_->GetImageAddress(object);
     } else {
       // NOTE: We're using linker patches for app->boot references when the image can
@@ -888,7 +893,7 @@
 
   void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
       SHARED_REQUIRES(Locks::mutator_lock_) {
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       method = writer_->image_writer_->GetImageMethodAddress(method);
     } else if (kIsDebugBuild) {
       // NOTE: We're using linker patches for app->boot references when the image can
@@ -911,7 +916,7 @@
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       SHARED_REQUIRES(Locks::mutator_lock_) {
     uint32_t address = target_offset;
-    if (writer_->HasImage()) {
+    if (writer_->HasBootImage()) {
       address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
                                       writer_->oat_data_offset_ + target_offset);
     }
@@ -1123,7 +1128,7 @@
   offset = RoundUp(offset, kPageSize);
   oat_header_->SetExecutableOffset(offset);
   size_executable_offset_alignment_ = offset - old_offset;
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     CHECK_EQ(image_patch_delta_, 0);
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
@@ -1164,7 +1169,7 @@
     } while (false)
 
   VISIT(InitCodeMethodVisitor);
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     VISIT(InitImageMethodVisitor);
   }
 
@@ -1408,7 +1413,7 @@
 }
 
 size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  if (compiler_driver_->IsImage()) {
+  if (compiler_driver_->IsBootImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
     #define DO_TRAMPOLINE(field) \
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index f2fe048..7027434 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -93,6 +93,7 @@
             int32_t image_patch_delta,
             const CompilerDriver* compiler,
             ImageWriter* image_writer,
+            bool compiling_boot_image,
             TimingLogger* timings,
             SafeMap<std::string, std::string>* key_value_store);
 
@@ -103,6 +104,10 @@
     return image_writer_ != nullptr;
   }
 
+  bool HasBootImage() const {
+    return compiling_boot_image_;
+  }
+
   const OatHeader& GetOatHeader() const {
     return *oat_header_;
   }
@@ -279,6 +284,7 @@
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
+  const bool compiling_boot_image_;
 
   // note OatFile does not take ownership of the DexFiles
   const std::vector<const DexFile*>* dex_files_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index bcc3240..cca0baf 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1169,8 +1169,10 @@
   // Return the range resulting from induction variable analysis of "instruction" when the value
   // is used from "context", for example, an index used from a bounds-check inside a loop body.
   ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction);
-    InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction);
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test);
     if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
         v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
       DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index ed193c7..167c35d 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -359,18 +359,10 @@
           // need a strategy for splitting exceptional edges. We split the block
           // after the move-exception (if present) and mark the first part not
           // throwing. The normal-flow edge between them will be split later.
-          HInstruction* first_insn = block->GetFirstInstruction();
-          if (first_insn->IsLoadException()) {
-            // Catch block starts with a LoadException. Split the block after
-            // the StoreLocal and ClearException which must come after the load.
-            DCHECK(first_insn->GetNext()->IsStoreLocal());
-            DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
-            throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
-          } else {
-            // Catch block does not load the exception. Split at the beginning
-            // to create an empty catch block.
-            throwing_block = block->SplitBefore(first_insn);
-          }
+          throwing_block = block->SplitCatchBlockAfterMoveException();
+          // Move-exception does not throw and the block has throwing insructions
+          // so it must have been possible to split it.
+          DCHECK(throwing_block != nullptr);
         }
 
         try_block_info.Put(throwing_block->GetBlockId(),
@@ -1006,7 +998,9 @@
     return false;
   }
 
-  if (invoke->IsInvokeStaticOrDirect()) {
+  if (invoke->IsInvokeStaticOrDirect() &&
+      HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+          invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
     invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
     (*argument_index)++;
   }
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a1bb5e0..ce92470 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -42,7 +42,7 @@
 
 #include "compiled_method.h"
 #include "dex/verified_method.h"
-#include "driver/dex_compilation_unit.h"
+#include "driver/compiler_driver.h"
 #include "gc_map_builder.h"
 #include "graph_visualizer.h"
 #include "intrinsics.h"
@@ -787,9 +787,10 @@
 }
 
 void CodeGenerator::BuildNativeGCMap(
-    ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
+    ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
   const std::vector<uint8_t>& gc_map_raw =
-      dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
+      compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
+          ->GetDexGcMap();
   verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
 
   uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
@@ -911,19 +912,22 @@
   vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
 }
 
-void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) {
-  uint32_t size = stack_map_stream_.PrepareForFillIn();
-  data->resize(size);
-  MemoryRegion region(data->data(), size);
+size_t CodeGenerator::ComputeStackMapsSize() {
+  return stack_map_stream_.PrepareForFillIn();
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region) {
   stack_map_stream_.FillIn(region);
 }
 
 void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc,
                                           uintptr_t native_pc_begin,
                                           uintptr_t native_pc_end) {
-  if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) {
-    src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
-                                    static_cast<int32_t>(dex_pc)}));
+  if (compiler_options_.GetGenerateDebugInfo() &&
+      dex_pc != kNoDexPc &&
+      native_pc_begin != native_pc_end) {
+    src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
+                                   static_cast<int32_t>(dex_pc)}));
   }
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 47b6f30..a92014d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/bit_field.h"
+#include "compiled_method.h"
 #include "driver/compiler_options.h"
 #include "globals.h"
 #include "graph_visualizer.h"
@@ -51,13 +52,9 @@
 
 class Assembler;
 class CodeGenerator;
-class DexCompilationUnit;
+class CompilerDriver;
 class LinkerPatch;
 class ParallelMoveResolver;
-class SrcMapElem;
-template <class Alloc>
-class SrcMap;
-using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
 
 class CodeAllocator {
  public:
@@ -284,13 +281,12 @@
     slow_paths_.push_back(slow_path);
   }
 
-  void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
-
   void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
   void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
   void BuildNativeGCMap(
-      ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
-  void BuildStackMaps(ArenaVector<uint8_t>* vector);
+      ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
+  void BuildStackMaps(MemoryRegion region);
+  size_t ComputeStackMapsSize();
 
   bool IsBaseline() const {
     return is_baseline_;
@@ -446,6 +442,10 @@
   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
 
+  const ArenaVector<SrcMapElem>& GetSrcMappingTable() const {
+    return src_map_;
+  }
+
  protected:
   // Method patch info used for recording locations of required linker patches and
   // target methods. The target method can be used for various purposes, whether for
@@ -488,7 +488,7 @@
         stats_(stats),
         graph_(graph),
         compiler_options_(compiler_options),
-        src_map_(nullptr),
+        src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         current_block_index_(0),
         is_leaf_(true),
@@ -602,7 +602,7 @@
   const CompilerOptions& compiler_options_;
 
   // Native to dex_pc map used for native debugging/profiling tools.
-  DefaultSrcMap* src_map_;
+  ArenaVector<SrcMapElem> src_map_;
   ArenaVector<SlowPathCode*> slow_paths_;
 
   // The current block index in `block_order_` of the block
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3dc3b7f..6d05293 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1300,20 +1300,29 @@
       DCHECK_EQ(cond_value, 0);
     }
   } else {
-    if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
-      // Condition has been materialized, compare the output to 0
+    // Can we optimize the jump if we know that the next block is the true case?
+    HCondition* condition = cond->AsCondition();
+    bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition);
+    if (condition == nullptr || condition->NeedsMaterialization()) {
+      // Condition has been materialized, compare the output to 0.
       DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
+      if (can_jump_to_false) {
+        __ CompareAndBranchIfZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
+                                  false_target);
+        return;
+      }
       __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
                                    true_target);
     } else {
       // Condition has not been materialized, use its inputs as the
       // comparison and its condition as the branch condition.
-      Primitive::Type type =
-          cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
+      Primitive::Type type = (condition != nullptr)
+          ? cond->InputAt(0)->GetType()
+          : Primitive::kPrimInt;
       // Is this a long or FP comparison that has been folded into the HCondition?
       if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
         // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
+        GenerateCompareTestAndBranch(instruction->AsIf(), condition,
                                      true_target, false_target, always_true_target);
         return;
       }
@@ -1328,7 +1337,12 @@
         DCHECK(right.IsConstant());
         GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
       }
-      __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
+      if (can_jump_to_false) {
+        __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+        return;
+      }
+
+      __ b(true_target, ARMCondition(condition->GetCondition()));
     }
   }
   if (false_target != nullptr) {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8106499..959adb4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -40,12 +40,8 @@
 static constexpr Register kMethodRegisterArgument = A0;
 
 // We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr Register TMP = T8;
 static constexpr FRegister FTMP = F8;
 
-// ART Thread Register.
-static constexpr Register TR = S1;
-
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 55efd5f..b36a042 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -16,13 +16,13 @@
 
 #include "code_generator_mips64.h"
 
+#include "art_method.h"
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_mips64.h"
-#include "art_method.h"
-#include "code_generator_utils.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "offsets.h"
@@ -666,9 +666,19 @@
         gpr = destination.AsRegister<GpuRegister>();
       }
       if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
-        __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+        int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+        int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+        if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+          gpr = ZERO;
+        } else {
+          __ LoadConst64(gpr, value);
+        }
       }
       if (dst_type == Primitive::kPrimFloat) {
         __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
@@ -734,12 +744,22 @@
       // Move to stack from constant
       HConstant* src_cst = source.GetConstant();
       StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+      GpuRegister gpr = ZERO;
       if (destination.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+        int32_t value = GetInt32ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
       } else {
-        __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+        DCHECK(destination.IsDoubleStackSlot());
+        int64_t value = GetInt64ValueOf(src_cst->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
       }
-      __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+      __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex());
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
       DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
@@ -755,9 +775,7 @@
   }
 }
 
-void CodeGeneratorMIPS64::SwapLocations(Location loc1,
-                                        Location loc2,
-                                        Primitive::Type type ATTRIBUTE_UNUSED) {
+void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) {
   DCHECK(!loc1.IsConstant());
   DCHECK(!loc2.IsConstant());
 
@@ -781,12 +799,16 @@
     // Swap 2 FPRs
     FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
     FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
-    // TODO: Can MOV.S/MOV.D be used here to save one instruction?
-    // Need to distinguish float from double, right?
-    __ Dmfc1(TMP, r2);
-    __ Dmfc1(AT, r1);
-    __ Dmtc1(TMP, r1);
-    __ Dmtc1(AT, r2);
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, r1);
+      __ MovS(r1, r2);
+      __ MovS(r2, FTMP);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, r1);
+      __ MovD(r1, r2);
+      __ MovD(r2, FTMP);
+    }
   } else if (is_slot1 != is_slot2) {
     // Swap GPR/FPR and stack slot
     Location reg_loc = is_slot1 ? loc2 : loc1;
@@ -800,7 +822,6 @@
                           reg_loc.AsFpuRegister<FpuRegister>(),
                           SP,
                           mem_loc.GetStackIndex());
-      // TODO: review this MTC1/DMTC1 move
       if (mem_loc.IsStackSlot()) {
         __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
       } else {
@@ -845,12 +866,22 @@
     } else {
       DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
       // Move to stack from constant
+      GpuRegister gpr = ZERO;
       if (location.IsStackSlot()) {
-        __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
-        __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+        int32_t value = GetInt32ValueOf(instruction->AsConstant());
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst32(gpr, value);
+        }
+        __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex());
       } else {
-        __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
-        __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+        DCHECK(location.IsDoubleStackSlot());
+        int64_t value = instruction->AsLongConstant()->GetValue();
+        if (value != 0) {
+          gpr = TMP;
+          __ LoadConst64(gpr, value);
+        }
+        __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex());
       }
     }
   } else if (instruction->IsTemporary()) {
@@ -1198,7 +1229,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     default:
@@ -1707,7 +1738,7 @@
   switch (in_type) {
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1736,8 +1767,18 @@
     case Primitive::kPrimLong: {
       GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
       GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      // TODO: more efficient (direct) comparison with a constant
+      Location rhs_location = locations->InAt(1);
+      bool use_imm = rhs_location.IsConstant();
+      GpuRegister rhs = ZERO;
+      if (use_imm) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+        if (value != 0) {
+          rhs = AT;
+          __ LoadConst64(rhs, value);
+        }
+      } else {
+        rhs = rhs_location.AsRegister<GpuRegister>();
+      }
       __ Slt(TMP, lhs, rhs);
       __ Slt(dst, rhs, lhs);
       __ Subu(dst, dst, TMP);
@@ -1902,6 +1943,252 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Move(out, ZERO);
+  } else {
+    if (imm == -1) {
+      if (type == Primitive::kPrimInt) {
+        __ Subu(out, ZERO, dividend);
+      } else {
+        DCHECK_EQ(type, Primitive::kPrimLong);
+        __ Dsubu(out, ZERO, dividend);
+      }
+    } else if (out != dividend) {
+      __ Move(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
+  DCHECK(IsPowerOfTwo(abs_imm));
+  int ctz_imm = CTZ(abs_imm);
+
+  if (instruction->IsDiv()) {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Srl(TMP, dividend, 31);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+      }
+      __ Addu(out, dividend, TMP);
+      __ Sra(out, out, ctz_imm);
+      if (imm < 0) {
+        __ Subu(out, ZERO, out);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for division by +/-2, which is very common.
+        __ Dsrl32(TMP, dividend, 31);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+      }
+      __ Daddu(out, dividend, TMP);
+      if (ctz_imm < 32) {
+        __ Dsra(out, out, ctz_imm);
+      } else {
+        __ Dsra32(out, out, ctz_imm - 32);
+      }
+      if (imm < 0) {
+        __ Dsubu(out, ZERO, out);
+      }
+    }
+  } else {
+    if (type == Primitive::kPrimInt) {
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Sra(TMP, dividend, 31);
+        __ Subu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Addu(out, out, TMP);
+      } else {
+        __ Sra(TMP, dividend, 31);
+        __ Srl(TMP, TMP, 32 - ctz_imm);
+        __ Addu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          __ Sll(out, out, 32 - ctz_imm);
+          __ Srl(out, out, 32 - ctz_imm);
+        }
+        __ Subu(out, out, TMP);
+      }
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimLong);
+      if (ctz_imm == 1) {
+        // Fast path for modulo +/-2, which is very common.
+        __ Dsra32(TMP, dividend, 31);
+        __ Dsubu(out, dividend, TMP);
+        __ Andi(out, out, 1);
+        __ Daddu(out, out, TMP);
+      } else {
+        __ Dsra32(TMP, dividend, 31);
+        if (ctz_imm > 32) {
+          __ Dsrl(TMP, TMP, 64 - ctz_imm);
+        } else {
+          __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+        }
+        __ Daddu(out, dividend, TMP);
+        if (IsUint<16>(abs_imm - 1)) {
+          __ Andi(out, out, abs_imm - 1);
+        } else {
+          if (ctz_imm > 32) {
+            __ Dsll(out, out, 64 - ctz_imm);
+            __ Dsrl(out, out, 64 - ctz_imm);
+          } else {
+            __ Dsll32(out, out, 32 - ctz_imm);
+            __ Dsrl32(out, out, 32 - ctz_imm);
+          }
+        }
+        __ Dsubu(out, out, TMP);
+      }
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm,
+                                  (type == Primitive::kPrimLong),
+                                  &magic,
+                                  &shift);
+
+  if (type == Primitive::kPrimInt) {
+    __ LoadConst32(TMP, magic);
+    __ MuhR6(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Addu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Subu(TMP, TMP, dividend);
+    }
+
+    if (shift != 0) {
+      __ Sra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Sra(out, TMP, 31);
+      __ Subu(out, TMP, out);
+    } else {
+      __ Sra(AT, TMP, 31);
+      __ Subu(AT, TMP, AT);
+      __ LoadConst32(TMP, imm);
+      __ MulR6(TMP, AT, TMP);
+      __ Subu(out, dividend, TMP);
+    }
+  } else {
+    __ LoadConst64(TMP, magic);
+    __ Dmuh(TMP, dividend, TMP);
+
+    if (imm > 0 && magic < 0) {
+      __ Daddu(TMP, TMP, dividend);
+    } else if (imm < 0 && magic > 0) {
+      __ Dsubu(TMP, TMP, dividend);
+    }
+
+    if (shift >= 32) {
+      __ Dsra32(TMP, TMP, shift - 32);
+    } else if (shift > 0) {
+      __ Dsra(TMP, TMP, shift);
+    }
+
+    if (instruction->IsDiv()) {
+      __ Dsra32(out, TMP, 31);
+      __ Dsubu(out, TMP, out);
+    } else {
+      __ Dsra32(AT, TMP, 31);
+      __ Dsubu(AT, TMP, AT);
+      __ LoadConst64(TMP, imm);
+      __ Dmul(TMP, AT, TMP);
+      __ Dsubu(out, dividend, TMP);
+    }
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+  LocationSummary* locations = instruction->GetLocations();
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(std::abs(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+    GpuRegister divisor = second.AsRegister<GpuRegister>();
+    if (instruction->IsDiv()) {
+      if (type == Primitive::kPrimInt)
+        __ DivR6(out, dividend, divisor);
+      else
+        __ Ddiv(out, dividend, divisor);
+    } else {
+      if (type == Primitive::kPrimInt)
+        __ ModR6(out, dividend, divisor);
+      else
+        __ Dmod(out, dividend, divisor);
+    }
+  }
+}
+
 void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1909,7 +2196,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -1931,16 +2218,9 @@
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ DivR6(dst, lhs, rhs);
-      else
-        __ Ddiv(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -2512,10 +2792,12 @@
   // allocation of a register for the current method pointer like on x86 baseline.
   // TODO: remove this once all the issues with register saving/restoring are
   // sorted out.
-  LocationSummary* locations = invoke->GetLocations();
-  Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
-  if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
-    locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+  if (invoke->HasCurrentMethodInput()) {
+    LocationSummary* locations = invoke->GetLocations();
+    Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+    if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+      locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+    }
   }
 }
 
@@ -2659,14 +2941,10 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
-    return;
-  }
-
+void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
-  GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+  GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -2675,13 +2953,21 @@
   // temp = object->GetClass();
   DCHECK(receiver.IsRegister());
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -3108,7 +3394,7 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
 
@@ -3128,20 +3414,12 @@
 
 void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
   Primitive::Type type = instruction->GetType();
-  LocationSummary* locations = instruction->GetLocations();
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
-      GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-      GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
-      GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
-      if (type == Primitive::kPrimInt)
-        __ ModR6(dst, lhs, rhs);
-      else
-        __ Dmod(dst, lhs, rhs);
+    case Primitive::kPrimLong:
+      GenerateDivRemIntegral(instruction);
       break;
-    }
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9bbd027..58c6e0f 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -230,6 +230,10 @@
                              Label* true_target,
                              Label* false_target,
                              Label* always_true_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
   Mips64Assembler* const assembler_;
@@ -333,10 +337,7 @@
       MethodReference target_method) OVERRIDE;
 
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
-                           Location temp ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(FATAL);
-  }
+  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 921c1d8..bf354e7 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_utils.h"
+#include "nodes.h"
 
 #include "base/logging.h"
 
@@ -94,4 +95,19 @@
   *shift = is_long ? p - 64 : p - 32;
 }
 
+// Is it valid to reverse the condition? Uses the values supplied to
+// GenerateTestAndBranch() in instruction generators.
+bool CanReverseCondition(Label* always_true_target,
+                         Label* false_target,
+                         HCondition* condition) {
+  // 'always_true_target' is null when the 'true' path is to the next
+  // block to be generated.  Check the type of the condition to ensure that
+  // FP conditions are not swapped.  This is for future fusing of HCompare and
+  // HCondition.
+  // Note:  If the condition is nullptr, then it is always okay to reverse.
+  return always_true_target == nullptr && false_target != nullptr &&
+         (condition == nullptr ||
+          !Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 59b495c..628eee8 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -21,10 +21,19 @@
 
 namespace art {
 
+class Label;
+class HCondition;
+
 // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
 // arguments `magic` and `shift`
 void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
 
+// Is it valid to reverse the condition? Uses the values supplied to
+// GenerateTestAndBranch() in instruction generators.
+bool CanReverseCondition(Label* always_true_target,
+                         Label* false_target,
+                         HCondition* condition);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0df7e3b..8308d9e 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1216,16 +1216,21 @@
       DCHECK_EQ(cond_value, 0);
     }
   } else {
+    HCondition* condition = cond->AsCondition();
     bool is_materialized =
-        !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
+        condition == nullptr || condition->NeedsMaterialization();
     // Moves do not affect the eflags register, so if the condition is
     // evaluated just before the if, we don't need to evaluate it
     // again.  We can't use the eflags on long/FP conditions if they are
     // materialized due to the complex branching.
-    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-    bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
+    Primitive::Type type = (condition != nullptr)
+        ? cond->InputAt(0)->GetType()
+        : Primitive::kPrimInt;
+    bool eflags_set = condition != nullptr
+        && condition->IsBeforeWhenDisregardMoves(instruction)
         && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type));
+    // Can we optimize the jump if we know that the next block is the true case?
+    bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition);
     if (is_materialized) {
       if (!eflags_set) {
         // Materialized condition, compare against 0.
@@ -1235,9 +1240,17 @@
         } else {
           __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
         }
+        if (can_jump_to_false) {
+          __ j(kEqual, false_target);
+          return;
+        }
         __ j(kNotEqual, true_target);
       } else {
-        __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
+        if (can_jump_to_false) {
+          __ j(X86Condition(condition->GetOppositeCondition()), false_target);
+          return;
+        }
+        __ j(X86Condition(condition->GetCondition()), true_target);
       }
     } else {
       // Condition has not been materialized, use its inputs as the
@@ -1247,7 +1260,7 @@
       if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
         // Generate the comparison directly.
         GenerateCompareTestAndBranch(instruction->AsIf(),
-                                     cond->AsCondition(),
+                                     condition,
                                      true_target,
                                      false_target,
                                      always_true_target);
@@ -1270,7 +1283,13 @@
       } else {
         __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
-      __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
+
+      if (can_jump_to_false) {
+        __ j(X86Condition(condition->GetOppositeCondition()), false_target);
+        return;
+      }
+
+      __ j(X86Condition(condition->GetCondition()), true_target);
     }
   }
   if (false_target != nullptr) {
@@ -4043,16 +4062,16 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(1, Location::RegisterLocation(EAX));
   } else if (Primitive::IsFloatingPointType(field_type)) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
-  } else {
+    if (is_volatile && field_type == Primitive::kPrimDouble) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
+  } else if (is_volatile && field_type == Primitive::kPrimLong) {
+    // In order to satisfy the semantics of volatile, this must be a single instruction store.
     locations->SetInAt(1, Location::RequiresRegister());
-  }
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-    // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
-    // Ensure the card is in a byte register.
-    locations->AddTemp(Location::RegisterLocation(ECX));
-  } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
+
     // 64bits value can be atomically written to an address with movsd and an XMM register.
     // We need two XMM registers because there's no easier way to (bit) copy a register pair
     // into a single XMM register (we copy each pair part into the XMMs and then interleave them).
@@ -4060,6 +4079,15 @@
     // isolated cases when we need this it isn't worth adding the extra complexity.
     locations->AddTemp(Location::RequiresFpuRegister());
     locations->AddTemp(Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+
+    if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());  // May be used for reference poisoning too.
+      // Ensure the card is in a byte register.
+      locations->AddTemp(Location::RegisterLocation(ECX));
+    }
   }
 }
 
@@ -4081,6 +4109,8 @@
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
@@ -4090,7 +4120,12 @@
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      __ movw(Address(base, offset), value.AsRegister<Register>());
+      if (value.IsConstant()) {
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movw(Address(base, offset), Immediate(v));
+      } else {
+        __ movw(Address(base, offset), value.AsRegister<Register>());
+      }
       break;
     }
 
@@ -4105,6 +4140,9 @@
         __ movl(temp, value.AsRegister<Register>());
         __ PoisonHeapReference(temp);
         __ movl(Address(base, offset), temp);
+      } else if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
       } else {
         __ movl(Address(base, offset), value.AsRegister<Register>());
       }
@@ -4120,21 +4158,40 @@
         __ punpckldq(temp1, temp2);
         __ movsd(Address(base, offset), temp1);
         codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
       } else {
         __ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
       }
+      maybe_record_implicit_null_check_done = true;
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -4143,8 +4200,7 @@
       UNREACHABLE();
   }
 
-  // Longs are handled in the switch.
-  if (field_type != Primitive::kPrimLong) {
+  if (!maybe_record_implicit_null_check_done) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
@@ -4481,7 +4537,7 @@
     // Ensure the value is in a byte register.
     locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
   } else if (Primitive::IsFloatingPointType(value_type)) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
@@ -4667,8 +4723,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4677,8 +4739,19 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<Register>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        Address address_hi = index.IsConstant() ?
+            Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                           offset + kX86WordSize) :
+            Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize);
+        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        __ movl(address, Immediate(Low32Bits(v)));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ movl(address_hi, Immediate(High32Bits(v)));
+      }
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5218d70..ee8a299 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1183,16 +1183,20 @@
       DCHECK_EQ(cond_value, 0);
     }
   } else {
-    bool is_materialized =
-        !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
+    HCondition* condition = cond->AsCondition();
+    bool is_materialized = condition == nullptr || condition->NeedsMaterialization();
     // Moves do not affect the eflags register, so if the condition is
     // evaluated just before the if, we don't need to evaluate it
     // again.  We can't use the eflags on FP conditions if they are
     // materialized due to the complex branching.
-    Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
-    bool eflags_set = cond->IsCondition()
-        && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
+    Primitive::Type type = (condition != nullptr)
+        ? cond->InputAt(0)->GetType()
+        : Primitive::kPrimInt;
+    bool eflags_set = condition != nullptr
+        && condition->IsBeforeWhenDisregardMoves(instruction)
         && !Primitive::IsFloatingPointType(type);
+    // Can we optimize the jump if we know that the next block is the true case?
+    bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition);
 
     if (is_materialized) {
       if (!eflags_set) {
@@ -1204,9 +1208,17 @@
           __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
                   Immediate(0));
         }
+        if (can_jump_to_false) {
+          __ j(kEqual, false_target);
+          return;
+        }
         __ j(kNotEqual, true_target);
       } else {
-        __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
+        if (can_jump_to_false) {
+          __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
+          return;
+        }
+        __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
       }
     } else {
       // Condition has not been materialized, use its inputs as the
@@ -1215,7 +1227,7 @@
       // Is this a long or FP comparison that has been folded into the HCondition?
       if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
         // Generate the comparison directly.
-        GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
+        GenerateCompareTestAndBranch(instruction->AsIf(), condition,
                                      true_target, false_target, always_true_target);
         return;
       }
@@ -1235,7 +1247,13 @@
         __ cmpl(lhs.AsRegister<CpuRegister>(),
                 Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
-      __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
+
+      if (can_jump_to_false) {
+        __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
+        return;
+      }
+
+      __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
     }
   }
   if (false_target != nullptr) {
@@ -2562,7 +2580,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       // We can use a leaq or addq if the constant can fit in an immediate.
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
@@ -2682,7 +2700,7 @@
     }
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -3755,14 +3773,25 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   Primitive::Type field_type = field_info.GetFieldType();
+  bool is_volatile = field_info.IsVolatile();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    if (is_volatile) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
+    } else {
+      locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+    }
   } else {
-    locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
+    if (is_volatile) {
+      // In order to satisfy the semantics of volatile, this must be a single instruction store.
+      locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
+    } else {
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+    }
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
@@ -3790,11 +3819,13 @@
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
+  bool maybe_record_implicit_null_check_done = false;
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       if (value.IsConstant()) {
-        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
         __ movb(Address(base, offset), Immediate(v));
       } else {
         __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3805,7 +3836,7 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       if (value.IsConstant()) {
-        int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
         __ movw(Address(base, offset), Immediate(v));
       } else {
         __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3838,9 +3869,11 @@
     case Primitive::kPrimLong: {
       if (value.IsConstant()) {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(v));
-        int32_t v_32 = v;
-        __ movq(Address(base, offset), Immediate(v_32));
+        codegen_->MoveInt64ToAddress(Address(base, offset),
+                                     Address(base, offset + sizeof(int32_t)),
+                                     v,
+                                     instruction);
+        maybe_record_implicit_null_check_done = true;
       } else {
         __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
       }
@@ -3848,12 +3881,28 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int32_t v =
+            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(Address(base, offset), Immediate(v));
+      } else {
+        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      if (value.IsConstant()) {
+        int64_t v =
+            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        codegen_->MoveInt64ToAddress(Address(base, offset),
+                                     Address(base, offset + sizeof(int32_t)),
+                                     v,
+                                     instruction);
+        maybe_record_implicit_null_check_done = true;
+      } else {
+        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+      }
       break;
     }
 
@@ -3862,7 +3911,9 @@
       UNREACHABLE();
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  if (!maybe_record_implicit_null_check_done) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
 
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
@@ -4170,13 +4221,9 @@
       may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
 
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(
-      1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  locations->SetInAt(2, Location::RequiresRegister());
-  if (value_type == Primitive::kPrimLong) {
-    locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
-  } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
-    locations->SetInAt(2, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
   } else {
     locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
   }
@@ -4330,13 +4377,15 @@
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
       if (value.IsRegister()) {
         __ movq(address, value.AsRegister<CpuRegister>());
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        DCHECK(IsInt<32>(v));
-        int32_t v_32 = v;
-        __ movq(address, Immediate(v_32));
+        Address address_high = index.IsConstant()
+            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                offset + sizeof(int32_t))
+            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
       }
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
 
@@ -4345,8 +4394,14 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movss(address, value.AsFpuRegister<XmmRegister>());
+      if (value.IsFpuRegister()) {
+        __ movss(address, value.AsFpuRegister<XmmRegister>());
+      } else {
+        DCHECK(value.IsConstant());
+        int32_t v =
+            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(address, Immediate(v));
+      }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       break;
     }
@@ -4356,9 +4411,18 @@
       Address address = index.IsConstant()
           ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
           : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
-      DCHECK(value.IsFpuRegister());
-      __ movsd(address, value.AsFpuRegister<XmmRegister>());
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
+      if (value.IsFpuRegister()) {
+        __ movsd(address, value.AsFpuRegister<XmmRegister>());
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else {
+        int64_t v =
+            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        Address address_high = index.IsConstant()
+            ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+                offset + sizeof(int32_t))
+            : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+        codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
+      }
       break;
     }
 
@@ -5564,6 +5628,24 @@
   return Address::RIP(table_fixup);
 }
 
+void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
+                                             const Address& addr_high,
+                                             int64_t v,
+                                             HInstruction* instruction) {
+  if (IsInt<32>(v)) {
+    int32_t v_32 = v;
+    __ movq(addr_low, Immediate(v_32));
+    MaybeRecordImplicitNullCheck(instruction);
+  } else {
+    // Didn't fit in a register.  Do it in pieces.
+    int32_t low_v = Low32Bits(v);
+    int32_t high_v = High32Bits(v);
+    __ movl(addr_low, Immediate(low_v));
+    MaybeRecordImplicitNullCheck(instruction);
+    __ movl(addr_high, Immediate(high_v));
+  }
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index fc485f5..7a52473 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -368,6 +368,12 @@
   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
   void Store64BitValueToStack(Location dest, int64_t value);
 
+  // Assign a 64 bit constant to an address.
+  void MoveInt64ToAddress(const Address& addr_low,
+                          const Address& addr_high,
+                          int64_t v,
+                          HInstruction* instruction);
+
  private:
   struct PcRelativeDexCacheAccessInfo {
     PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
new file mode 100644
index 0000000..b459d24
--- /dev/null
+++ b/compiler/optimizing/common_dominator.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Helper class for finding common dominators of two or more blocks in a graph.
+// The domination information of a graph must not be modified while there is
+// a CommonDominator object as it's internal state could become invalid.
+class CommonDominator {
+ public:
+  // Convenience function to find the common dominator of 2 blocks.
+  static HBasicBlock* ForPair(HBasicBlock* block1, HBasicBlock* block2) {
+    CommonDominator finder(block1);
+    finder.Update(block2);
+    return finder.Get();
+  }
+
+  // Create a finder starting with a given block.
+  explicit CommonDominator(HBasicBlock* block)
+      : dominator_(block), chain_length_(ChainLength(block)) {
+    DCHECK(block != nullptr);
+  }
+
+  // Update the common dominator with another block.
+  void Update(HBasicBlock* block) {
+    DCHECK(block != nullptr);
+    HBasicBlock* block2 = dominator_;
+    DCHECK(block2 != nullptr);
+    if (block == block2) {
+      return;
+    }
+    size_t chain_length = ChainLength(block);
+    size_t chain_length2 = chain_length_;
+    // Equalize the chain lengths
+    for ( ; chain_length > chain_length2; --chain_length) {
+      block = block->GetDominator();
+      DCHECK(block != nullptr);
+    }
+    for ( ; chain_length2 > chain_length; --chain_length2) {
+      block2 = block2->GetDominator();
+      DCHECK(block2 != nullptr);
+    }
+    // Now run up the chain until we hit the common dominator.
+    while (block != block2) {
+      --chain_length;
+      block = block->GetDominator();
+      DCHECK(block != nullptr);
+      block2 = block2->GetDominator();
+      DCHECK(block2 != nullptr);
+    }
+    dominator_ = block;
+    chain_length_ = chain_length;
+  }
+
+  HBasicBlock* Get() const {
+    return dominator_;
+  }
+
+ private:
+  static size_t ChainLength(HBasicBlock* block) {
+    size_t result = 0;
+    while (block != nullptr) {
+      ++result;
+      block = block->GetDominator();
+    }
+    return result;
+  }
+
+  HBasicBlock* dominator_;
+  size_t chain_length_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3de96b5..0d7c796 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -188,6 +188,21 @@
   VisitInstruction(try_boundary);
 }
 
+void GraphChecker::VisitLoadException(HLoadException* load) {
+  // Ensure that LoadException is the first instruction in a catch block.
+  if (!load->GetBlock()->IsCatchBlock()) {
+    AddError(StringPrintf("%s:%d is in a non-catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  } else if (load->GetBlock()->GetFirstInstruction() != load) {
+    AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+                          load->DebugName(),
+                          load->GetId(),
+                          load->GetBlock()->GetBlockId()));
+  }
+}
+
 void GraphChecker::VisitInstruction(HInstruction* instruction) {
   if (seen_ids_.IsBitSet(instruction->GetId())) {
     AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -242,10 +257,11 @@
     }
     size_t use_index = use_it.Current()->GetIndex();
     if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
-      AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
+      AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
                             "UseListNode index.",
                             use->DebugName(),
                             use->GetId(),
+                            instruction->DebugName(),
                             instruction->GetId()));
     }
   }
@@ -445,12 +461,18 @@
   int id = loop_header->GetBlockId();
   HLoopInformation* loop_information = loop_header->GetLoopInformation();
 
-  // Ensure the pre-header block is first in the list of
-  // predecessors of a loop header.
+  // Ensure the pre-header block is first in the list of predecessors of a loop
+  // header and that the header block is its only successor.
   if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
     AddError(StringPrintf(
         "Loop pre-header is not the first predecessor of the loop header %d.",
         id));
+  } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) {
+    AddError(StringPrintf(
+        "Loop pre-header %d of loop defined by header %d has %zu successors.",
+        loop_information->GetPreHeader()->GetBlockId(),
+        id,
+        loop_information->GetPreHeader()->GetSuccessors().size()));
   }
 
   // Ensure the loop header has only one incoming branch and the remaining
@@ -493,6 +515,13 @@
             "Loop defined by header %d has an invalid back edge %d.",
             id,
             back_edge_id));
+      } else if (back_edge->GetLoopInformation() != loop_information) {
+        AddError(StringPrintf(
+            "Back edge %d of loop defined by header %d belongs to nested loop "
+            "with header %d.",
+            back_edge_id,
+            id,
+            back_edge->GetLoopInformation()->GetHeader()->GetBlockId()));
       }
     }
   }
@@ -531,10 +560,14 @@
        !use_it.Done(); use_it.Advance()) {
     HInstruction* use = use_it.Current()->GetUser();
     if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
-      AddError(StringPrintf("Instruction %d in block %d does not dominate "
-                            "use %d in block %d.",
-                            instruction->GetId(), current_block_->GetBlockId(),
-                            use->GetId(), use->GetBlock()->GetBlockId()));
+      AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
+                            "use %s:%d in block %d.",
+                            instruction->DebugName(),
+                            instruction->GetId(),
+                            current_block_->GetBlockId(),
+                            use->DebugName(),
+                            use->GetId(),
+                            use->GetBlock()->GetBlockId()));
     }
   }
 
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index abf3659..d5ddbab 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -50,6 +50,9 @@
   // Check successors of blocks ending in TryBoundary.
   void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
 
+  // Check that LoadException is the first instruction in a catch block.
+  void VisitLoadException(HLoadException* load) OVERRIDE;
+
   // Check that HCheckCast and HInstanceOf have HLoadClass as second input.
   void VisitCheckCast(HCheckCast* check) OVERRIDE;
   void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index b7262f6..5de94f4 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -69,10 +69,13 @@
     entry_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(entry_);
     BuildForLoop(0, n);
+    return_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(return_);
     exit_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(exit_);
     entry_->AddSuccessor(loop_preheader_[0]);
-    loop_header_[0]->AddSuccessor(exit_);
+    loop_header_[0]->AddSuccessor(return_);
+    return_->AddSuccessor(exit_);
     graph_->SetEntryBlock(entry_);
     graph_->SetExitBlock(exit_);
 
@@ -91,6 +94,7 @@
     entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_));
     dum_ = new (&allocator_) HLocal(n + 2);
     entry_->AddInstruction(dum_);
+    return_->AddInstruction(new (&allocator_) HReturnVoid());
     exit_->AddInstruction(new (&allocator_) HExit());
 
     // Provide loop instructions.
@@ -177,6 +181,7 @@
 
   // Fixed basic blocks and instructions.
   HBasicBlock* entry_;
+  HBasicBlock* return_;
   HBasicBlock* exit_;
   HInstruction* parameter_;  // "this"
   HInstruction* constant0_;
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5530d26..b40ef5a 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -75,10 +75,12 @@
   return v;
 }
 
-static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) {
-  DCHECK(preheader != nullptr);
+/** Helper method to insert an instruction. */
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+  DCHECK(block != nullptr);
+  DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
   DCHECK(instruction != nullptr);
-  preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction());
+  block->InsertInstructionBefore(instruction, block->GetLastInstruction());
   return instruction;
 }
 
@@ -91,48 +93,98 @@
   DCHECK(induction_analysis != nullptr);
 }
 
-InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context,
-                                                            HInstruction* instruction) {
-  return GetInduction(context, instruction, /* is_min */ true);
-}
-
-InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context,
-                                                            HInstruction* instruction) {
-  return SimplifyMax(GetInduction(context, instruction, /* is_min */ false));
+void InductionVarRange::GetInductionRange(HInstruction* context,
+                                          HInstruction* instruction,
+                                          /*out*/Value* min_val,
+                                          /*out*/Value* max_val,
+                                          /*out*/bool* needs_finite_test) {
+  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
+  if (loop != nullptr) {
+    // Set up loop information.
+    HBasicBlock* header = loop->GetHeader();
+    bool in_body = context->GetBlock() != header;
+    HInductionVarAnalysis::InductionInfo* info =
+        induction_analysis_->LookupInfo(loop, instruction);
+    HInductionVarAnalysis::InductionInfo* trip =
+        induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+    // Find range.
+    *min_val = GetVal(info, trip, in_body, /* is_min */ true);
+    *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+    *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+  } else {
+    // No loop to analyze.
+    *min_val = Value();
+    *max_val = Value();
+    *needs_finite_test = false;
+  }
 }
 
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
-                                        /*out*/bool* top_test) {
-  return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test);
+                                        /*out*/bool* needs_finite_test,
+                                        /*out*/bool* needs_taken_test) {
+  return GenerateCode(context,
+                      instruction,
+                      nullptr, nullptr, nullptr, nullptr, nullptr,  // nothing generated yet
+                      needs_finite_test,
+                      needs_taken_test);
 }
 
-bool InductionVarRange::GenerateCode(HInstruction* context,
-                                     HInstruction* instruction,
-                                     HGraph* graph,
-                                     HBasicBlock* block,
-                                     /*out*/HInstruction** lower,
-                                     /*out*/HInstruction** upper) {
-  return GenerateCode(context, instruction, graph, block, lower, upper, nullptr);
+void InductionVarRange::GenerateRangeCode(HInstruction* context,
+                                          HInstruction* instruction,
+                                          HGraph* graph,
+                                          HBasicBlock* block,
+                                          /*out*/HInstruction** lower,
+                                          /*out*/HInstruction** upper) {
+  bool b1, b2;  // unused
+  if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) {
+    LOG(FATAL) << "Failed precondition: GenerateCode()";
+  }
+}
+
+void InductionVarRange::GenerateTakenTest(HInstruction* context,
+                                          HGraph* graph,
+                                          HBasicBlock* block,
+                                          /*out*/HInstruction** taken_test) {
+  bool b1, b2;  // unused
+  if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) {
+    LOG(FATAL) << "Failed precondition: GenerateCode()";
+  }
 }
 
 //
 // Private class methods.
 //
 
-InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context,
-                                                         HInstruction* instruction,
-                                                         bool is_min) {
-  HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
-  if (loop != nullptr) {
-    HBasicBlock* header = loop->GetHeader();
-    bool in_body = context->GetBlock() != header;
-    return GetVal(induction_analysis_->LookupInfo(loop, instruction),
-                  induction_analysis_->LookupInfo(loop, header->GetLastInstruction()),
-                  in_body,
-                  is_min);
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+  if (info != nullptr) {
+    if (info->induction_class == HInductionVarAnalysis::kLinear) {
+      return true;
+    } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) {
+      return NeedsTripCount(info->op_b);
+    }
   }
-  return Value();
+  return false;
+}
+
+bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+  if (trip != nullptr) {
+    if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+      return trip->operation == HInductionVarAnalysis::kTripCountInBody ||
+             trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe;
+    }
+  }
+  return false;
+}
+
+bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+  if (trip != nullptr) {
+    if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+      return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe ||
+             trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe;
+    }
+  }
+  return false;
 }
 
 InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
@@ -184,11 +236,13 @@
           case HInductionVarAnalysis::kFetch:
             return GetFetch(info->fetch, trip, in_body, is_min);
           case HInductionVarAnalysis::kTripCountInLoop:
+          case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
               return GetVal(info->op_a, trip, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
+          case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               return Value(0);
             } else if (in_body) {
@@ -356,25 +410,42 @@
                                      HBasicBlock* block,
                                      /*out*/HInstruction** lower,
                                      /*out*/HInstruction** upper,
-                                     /*out*/bool* top_test) {
+                                     /*out*/HInstruction** taken_test,
+                                     /*out*/bool* needs_finite_test,
+                                     /*out*/bool* needs_taken_test) {
   HLoopInformation* loop = context->GetBlock()->GetLoopInformation();  // closest enveloping loop
   if (loop != nullptr) {
+    // Set up loop information.
     HBasicBlock* header = loop->GetHeader();
     bool in_body = context->GetBlock() != header;
-    HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
+    HInductionVarAnalysis::InductionInfo* info =
+        induction_analysis_->LookupInfo(loop, instruction);
+    if (info == nullptr) {
+      return false;  // nothing to analyze
+    }
     HInductionVarAnalysis::InductionInfo* trip =
         induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    if (info != nullptr && trip != nullptr) {
-      if (top_test != nullptr) {
-        *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop;
+    // Determine what tests are needed.
+    *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+    *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip);
+    // Code generation for taken test: generate the code when requested or otherwise analyze
+    // if code generation is feasible when taken test is needed.
+    if (taken_test != nullptr) {
+      return GenerateCode(
+          trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
+    } else if (*needs_taken_test) {
+      if (!GenerateCode(
+          trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
+        return false;
       }
-      return
+    }
+    // Code generation for lower and upper.
+    return
         // Success on lower if invariant (not set), or code can be generated.
         ((info->induction_class == HInductionVarAnalysis::kInvariant) ||
             GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
         // And success on upper.
         GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
-    }
   }
   return false;
 }
@@ -387,19 +458,38 @@
                                      bool in_body,
                                      bool is_min) {
   if (info != nullptr) {
+    // Handle current operation.
     Primitive::Type type = Primitive::kPrimInt;
     HInstruction* opa = nullptr;
     HInstruction* opb = nullptr;
-    int32_t value = 0;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
         // Invariants.
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
+          case HInductionVarAnalysis::kLT:
+          case HInductionVarAnalysis::kLE:
+          case HInductionVarAnalysis::kGT:
+          case HInductionVarAnalysis::kGE:
             if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
                 GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
               if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+                HInstruction* operation = nullptr;
+                switch (info->operation) {
+                  case HInductionVarAnalysis::kAdd:
+                    operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+                  case HInductionVarAnalysis::kLT:
+                    operation = new (graph->GetArena()) HLessThan(opa, opb); break;
+                  case HInductionVarAnalysis::kLE:
+                    operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break;
+                  case HInductionVarAnalysis::kGT:
+                    operation = new (graph->GetArena()) HGreaterThan(opa, opb); break;
+                  case HInductionVarAnalysis::kGE:
+                    operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break;
+                  default:
+                    LOG(FATAL) << "unknown operation";
+                }
+                *result = Insert(block, operation);
               }
               return true;
             }
@@ -427,11 +517,13 @@
             }
             return true;
           case HInductionVarAnalysis::kTripCountInLoop:
+          case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
               return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min);
             }
             FALLTHROUGH_INTENDED;
           case HInductionVarAnalysis::kTripCountInBody:
+          case HInductionVarAnalysis::kTripCountInBodyUnsafe:
             if (is_min) {
               if (graph != nullptr) {
                 *result = graph->GetIntConstant(0);
@@ -452,23 +544,31 @@
             break;
         }
         break;
-      case HInductionVarAnalysis::kLinear:
-        // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
-        // to avoid arithmetic wrap-around situations that are hard to guard against.
-        if (GetConstant(info->op_a, &value)) {
-          if (value == 1 || value == -1) {
-            const bool is_min_a = value == 1 ? is_min : !is_min;
-            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
-                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
-              if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+      case HInductionVarAnalysis::kLinear: {
+          // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
+          // to avoid arithmetic wrap-around situations that are hard to guard against.
+          int32_t stride_value = 0;
+          if (GetConstant(info->op_a, &stride_value)) {
+            if (stride_value == 1 || stride_value == -1) {
+              const bool is_min_a = stride_value == 1 ? is_min : !is_min;
+              if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                  GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+                if (graph != nullptr) {
+                  HInstruction* oper;
+                  if (stride_value == 1) {
+                    oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                  } else {
+                    oper = new (graph->GetArena()) HSub(type, opb, opa);
+                  }
+                  *result = Insert(block, oper);
+                }
+                return true;
               }
-              return true;
             }
           }
         }
         break;
-      default:  // TODO(ajcbik): add more cases
+      default:
         break;
     }
   }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7fa5a26..7984871 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -57,29 +57,33 @@
   explicit InductionVarRange(HInductionVarAnalysis* induction);
 
   /**
-   * Given a context denoted by the first instruction, returns a,
-   * possibly conservative, lower bound on the instruction's value.
+   * Given a context denoted by the first instruction, returns a possibly conservative
+   * lower and upper bound on the instruction's value in the output parameters min_val
+   * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
+   * is needed to protect the range evaluation inside its loop.
    */
-  Value GetMinInduction(HInstruction* context, HInstruction* instruction);
+  void GetInductionRange(HInstruction* context,
+                         HInstruction* instruction,
+                         /*out*/Value* min_val,
+                         /*out*/Value* max_val,
+                         /*out*/bool* needs_finite_test);
 
   /**
-   * Given a context denoted by the first instruction, returns a,
-   * possibly conservative, upper bound on the instruction's value.
+   * Returns true if range analysis is able to generate code for the lower and upper
+   * bound expressions on the instruction in the given context. The need_finite_test
+   * and need_taken test flags denote if an additional finite-test and/or taken-test
+   * are needed to protect the range evaluation inside its loop.
    */
-  Value GetMaxInduction(HInstruction* context, HInstruction* instruction);
-
-  /**
-   * Returns true if range analysis is able to generate code for the lower and upper bound
-   * expressions on the instruction in the given context. Output parameter top_test denotes
-   * whether a top test is needed to protect the trip-count expression evaluation.
-   */
-  bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test);
+  bool CanGenerateCode(HInstruction* context,
+                       HInstruction* instruction,
+                       /*out*/bool* needs_finite_test,
+                       /*out*/bool* needs_taken_test);
 
   /**
    * Generates the actual code in the HIR for the lower and upper bound expressions on the
    * instruction in the given context. Code for the lower and upper bound expression are
-   * generated in given block and graph and are returned in lower and upper, respectively.
-   * For a loop invariant, lower is not set.
+   * generated in given block and graph and are returned in the output parameters lower and
+   * upper, respectively. For a loop invariant, lower is not set.
    *
    * For example, given expression x+i with range [0, 5] for i, calling this method
    * will generate the following sequence:
@@ -87,20 +91,35 @@
    * block:
    *   lower: add x, 0
    *   upper: add x, 5
+   *
+   * Precondition: CanGenerateCode() returns true.
    */
-  bool GenerateCode(HInstruction* context,
-                    HInstruction* instruction,
-                    HGraph* graph,
-                    HBasicBlock* block,
-                    /*out*/HInstruction** lower,
-                    /*out*/HInstruction** upper);
+  void GenerateRangeCode(HInstruction* context,
+                         HInstruction* instruction,
+                         HGraph* graph,
+                         HBasicBlock* block,
+                         /*out*/HInstruction** lower,
+                         /*out*/HInstruction** upper);
+
+  /**
+   * Generates explicit taken-test for the loop in the given context. Code is generated in
+   * given block and graph. The taken-test is returned in parameter test.
+   *
+   * Precondition: CanGenerateCode() returns true and needs_taken_test is set.
+   */
+  void GenerateTakenTest(HInstruction* context,
+                         HGraph* graph,
+                         HBasicBlock* block,
+                         /*out*/HInstruction** taken_test);
 
  private:
   //
   // Private helper methods.
   //
 
-  Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min);
+  static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info);
+  static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip);
+  static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip);
 
   static Value GetFetch(HInstruction* instruction,
                         HInductionVarAnalysis::InductionInfo* trip,
@@ -130,8 +149,8 @@
   static Value MergeVal(Value v1, Value v2, bool is_min);
 
   /**
-   * Generates code for lower/upper expression in the HIR. Returns true on success.
-   * With graph == nullptr, the method can be used to determine if code generation
+   * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
+   * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
    */
   bool GenerateCode(HInstruction* context,
@@ -140,7 +159,9 @@
                     HBasicBlock* block,
                     /*out*/HInstruction** lower,
                     /*out*/HInstruction** upper,
-                    bool* top_test);
+                    /*out*/HInstruction** taken_test,
+                    /*out*/bool* needs_finite_test,
+                    /*out*/bool* needs_taken_test);
 
   static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
                            HInductionVarAnalysis::InductionInfo* trip,
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index ce8926a..c2ba157 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -46,6 +46,10 @@
     EXPECT_EQ(v1.is_known, v2.is_known);
   }
 
+  //
+  // Construction methods.
+  //
+
   /** Constructs bare minimum graph. */
   void BuildGraph() {
     graph_->SetNumberOfVRegs(1);
@@ -58,7 +62,7 @@
   }
 
   /** Constructs loop with given upper bound. */
-  void BuildLoop(HInstruction* upper) {
+  void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) {
     // Control flow.
     loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(loop_preheader_);
@@ -66,29 +70,37 @@
     graph_->AddBlock(loop_header);
     HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
     graph_->AddBlock(loop_body);
+    HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(return_block);
     entry_block_->AddSuccessor(loop_preheader_);
     loop_preheader_->AddSuccessor(loop_header);
     loop_header->AddSuccessor(loop_body);
-    loop_header->AddSuccessor(exit_block_);
+    loop_header->AddSuccessor(return_block);
     loop_body->AddSuccessor(loop_header);
+    return_block->AddSuccessor(exit_block_);
     // Instructions.
     HLocal* induc = new (&allocator_) HLocal(0);
     entry_block_->AddInstruction(induc);
     loop_preheader_->AddInstruction(
-        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0)));  // i = 0
+        new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower)));  // i = l
     loop_preheader_->AddInstruction(new (&allocator_) HGoto());
     HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
           loop_header->AddInstruction(load);
-    condition_ = new (&allocator_) HLessThan(load, upper);
+    if (stride > 0) {
+      condition_ = new (&allocator_) HLessThan(load, upper);  // i < u
+    } else {
+      condition_ = new (&allocator_) HGreaterThan(load, upper);  // i > u
+    }
     loop_header->AddInstruction(condition_);
-    loop_header->AddInstruction(new (&allocator_) HIf(condition_));  // i < u
+    loop_header->AddInstruction(new (&allocator_) HIf(condition_));
     load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
     loop_body->AddInstruction(load);
-    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1));
+    increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride));
     loop_body->AddInstruction(increment_);
-    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i++
+    loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_));  // i += s
     loop_body->AddInstruction(new (&allocator_) HGoto());
-    exit_block_->AddInstruction(new (&allocator_) HReturnVoid());
+    return_block->AddInstruction(new (&allocator_) HReturnVoid());
+    exit_block_->AddInstruction(new (&allocator_) HExit());
   }
 
   /** Performs induction variable analysis. */
@@ -124,8 +136,20 @@
   }
 
   /** Constructs a trip-count. */
-  HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) {
-    return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
+  HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) {
+    if (in_loop && safe) {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
+    } else if (in_loop) {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr);
+    } else if (safe) {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr);
+    } else {
+      return iva_->CreateTripCount(
+          HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr);
+    }
   }
 
   /** Constructs a linear a * i + b induction. */
@@ -139,16 +163,34 @@
         HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi));
   }
 
+  /** Constructs a wrap-around induction consisting of a constant, followed info */
+  HInductionVarAnalysis::InductionInfo* CreateWrapAround(
+      int32_t initial,
+      HInductionVarAnalysis::InductionInfo* info) {
+    return iva_->CreateInduction(HInductionVarAnalysis::kWrapAround, CreateConst(initial), info);
+  }
+
   /** Constructs a wrap-around induction consisting of a constant, followed by a range. */
   HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) {
-    return iva_->CreateInduction(
-        HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi));
+    return CreateWrapAround(initial, CreateRange(lo, hi));
   }
 
   //
   // Relay methods.
   //
 
+  bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+    return InductionVarRange::NeedsTripCount(info);
+  }
+
+  bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+    return InductionVarRange::IsBodyTripCount(trip);
+  }
+
+  bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+    return InductionVarRange::IsUnsafeTripCount(trip);
+  }
+
   Value GetMin(HInductionVarAnalysis::InductionInfo* info,
                HInductionVarAnalysis::InductionInfo* induc) {
     return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
@@ -202,6 +244,26 @@
 // Tests on static methods.
 //
 
+TEST_F(InductionVarRangeTest, TripCountProperties) {
+  EXPECT_FALSE(NeedsTripCount(nullptr));
+  EXPECT_FALSE(NeedsTripCount(CreateConst(1)));
+  EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1)));
+  EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3)));
+  EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1))));
+
+  EXPECT_FALSE(IsBodyTripCount(nullptr));
+  EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true)));
+  EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false)));
+  EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true)));
+  EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false)));
+
+  EXPECT_FALSE(IsUnsafeTripCount(nullptr));
+  EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true)));
+  EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false)));
+  EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true)));
+  EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false)));
+}
+
 TEST_F(InductionVarRangeTest, GetMinMaxNull) {
   ExpectEqual(Value(), GetMin(nullptr, nullptr));
   ExpectEqual(Value(), GetMax(nullptr, nullptr));
@@ -279,10 +341,10 @@
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxLinear) {
-  ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100)));
-  ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100)));
+  ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
+  ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
 }
 
 TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) {
@@ -398,61 +460,98 @@
 // Tests on instance methods.
 //
 
-TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) {
-  BuildLoop(graph_->GetIntConstant(1000));
+TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
+  BuildLoop(0, graph_->GetIntConstant(1000), 1);
   PerformInductionVarAnalysis();
   InductionVarRange range(iva_);
 
+  Value v1, v2;
+  bool needs_finite_test = true;
+
   // In context of header: known.
-  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
-  ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(1000), v2);
 
   // In context of loop-body: known.
-  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
-  ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_));
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(999), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(1000), v2);
 }
 
-TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) {
-  HInstruction* parameter = new (&allocator_) HParameterValue(
-      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
-  entry_block_->AddInstruction(parameter);
-  BuildLoop(parameter);
+TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
+  BuildLoop(1000, graph_->GetIntConstant(0), -1);
   PerformInductionVarAnalysis();
   InductionVarRange range(iva_);
 
-  // In context of header: full range unknown.
-  ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
-  ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+  Value v1, v2;
+  bool needs_finite_test = true;
+
+  // In context of header: known.
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(1000), v2);
 
   // In context of loop-body: known.
-  ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0)));
-  ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
-  ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_));
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(1000), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(999), v2);
 }
 
-TEST_F(InductionVarRangeTest, CodeGeneration) {
+TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
   HInstruction* parameter = new (&allocator_) HParameterValue(
       graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
   entry_block_->AddInstruction(parameter);
-  BuildLoop(parameter);
+  BuildLoop(0, parameter, 1);
   PerformInductionVarAnalysis();
   InductionVarRange range(iva_);
 
+  Value v1, v2;
+  bool needs_finite_test = true;
+  bool needs_taken_test = true;
+
+  // In context of header: upper unknown.
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(), v2);
+
+  // In context of loop-body: known.
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(0), v1);
+  ExpectEqual(Value(parameter, 1, -1), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(1), v1);
+  ExpectEqual(Value(parameter, 1, 0), v2);
+
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
-  bool top_test = false;
+  HInstruction* taken = nullptr;
 
   // Can generate code in context of loop-body only.
-  EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test));
-  ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test));
-  EXPECT_TRUE(top_test);
+  EXPECT_FALSE(range.CanGenerateCode(
+      condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  ASSERT_TRUE(range.CanGenerateCode(
+      increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  EXPECT_FALSE(needs_finite_test);
+  EXPECT_TRUE(needs_taken_test);
 
   // Generates code.
-  EXPECT_TRUE(range.GenerateCode(
-      increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper));
+  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
 
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
@@ -462,7 +561,7 @@
   ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
   EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
 
-  // Verify upper is (V-1)+0
+  // Verify upper is (V-1)+0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsAdd());
   ASSERT_TRUE(upper->InputAt(0)->IsSub());
@@ -471,6 +570,91 @@
   EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
   ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
   EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify taken-test is 0<V.
+  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  ASSERT_TRUE(taken != nullptr);
+  ASSERT_TRUE(taken->IsLessThan());
+  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
+}
+
+TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
+  HInstruction* parameter = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+  entry_block_->AddInstruction(parameter);
+  BuildLoop(1000, parameter, -1);
+  PerformInductionVarAnalysis();
+  InductionVarRange range(iva_);
+
+  Value v1, v2;
+  bool needs_finite_test = true;
+  bool needs_taken_test = true;
+
+  // In context of header: lower unknown.
+  range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(), v1);
+  ExpectEqual(Value(1000), v2);
+
+  // In context of loop-body: known.
+  range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(parameter, 1, 1), v1);
+  ExpectEqual(Value(1000), v2);
+  range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+  EXPECT_FALSE(needs_finite_test);
+  ExpectEqual(Value(parameter, 1, 0), v1);
+  ExpectEqual(Value(999), v2);
+
+  HInstruction* lower = nullptr;
+  HInstruction* upper = nullptr;
+  HInstruction* taken = nullptr;
+
+  // Can generate code in context of loop-body only.
+  EXPECT_FALSE(range.CanGenerateCode(
+      condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  ASSERT_TRUE(range.CanGenerateCode(
+      increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+  EXPECT_FALSE(needs_finite_test);
+  EXPECT_TRUE(needs_taken_test);
+
+  // Generates code.
+  range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+
+  // Verify lower is 1000-(-(V-1000)-1).
+  ASSERT_TRUE(lower != nullptr);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  lower = lower->InputAt(1);
+  ASSERT_TRUE(lower->IsSub());
+  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+  lower = lower->InputAt(0);
+  ASSERT_TRUE(lower->IsNeg());
+  lower = lower->InputAt(0);
+  ASSERT_TRUE(lower->IsSub());
+  EXPECT_TRUE(lower->InputAt(0)->IsParameterValue());
+  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify upper is 1000-0.
+  ASSERT_TRUE(upper != nullptr);
+  ASSERT_TRUE(upper->IsSub());
+  ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
+  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
+  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+  // Verify taken-test is 1000>V.
+  range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+  ASSERT_TRUE(taken != nullptr);
+  ASSERT_TRUE(taken->IsGreaterThan());
+  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+  EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index dbe7524..b01324e 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -89,10 +89,7 @@
   }
 }
 
-static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
-  if (instruction_set == kMips) {
-    return Intrinsics::kNone;
-  }
+static Intrinsics GetIntrinsic(InlineMethod method) {
   switch (method.opcode) {
     // Floating-point conversions.
     case kIntrinsicDoubleCvt:
@@ -431,7 +428,7 @@
         DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file);
         DCHECK(inliner != nullptr);
         if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
-          Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet());
+          Intrinsics intrinsic = GetIntrinsic(method);
 
           if (intrinsic != Intrinsics::kNone) {
             if (!CheckInvokeType(intrinsic, invoke, dex_file)) {
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5efcf4e..a94e3a8 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -138,6 +138,108 @@
 
 #define __ assembler->
 
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
+  MipsAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register str = locations->InAt(0).AsRegister<Register>();
+  Register arg = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+  MipsLabel loop;
+  MipsLabel end;
+  MipsLabel return_true;
+  MipsLabel return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // If the register containing the pointer to "this", and the register
+  // containing the pointer to "anObject" are the same register then
+  // "this", and "anObject" are the same object and we can
+  // short-circuit the logic to a true result.
+  if (str == arg) {
+    __ LoadConst32(out, 1);
+    return;
+  }
+
+  // Check if input is null, return false if it is.
+  __ Beqz(arg, &return_false);
+
+  // Reference equality check, return true if same reference.
+  __ Beq(str, arg, &return_true);
+
+  // Instanceof check for the argument by comparing class fields.
+  // All string objects must have the same type since String cannot be subclassed.
+  // Receiver must be a string object, so its class field is equal to all strings' class fields.
+  // If the argument is a string object, its class field must be equal to receiver's class field.
+  __ Lw(temp1, str, class_offset);
+  __ Lw(temp2, arg, class_offset);
+  __ Bne(temp1, temp2, &return_false);
+
+  // Load lengths of this and argument strings.
+  __ Lw(temp1, str, count_offset);
+  __ Lw(temp2, arg, count_offset);
+  // Check if lengths are equal, return false if they're not.
+  __ Bne(temp1, temp2, &return_false);
+  // Return true if both strings are empty.
+  __ Beqz(temp1, &return_true);
+
+  // Don't overwrite input registers
+  __ Move(TMP, str);
+  __ Move(temp3, arg);
+
+  // Assertions that must hold in order to compare strings 2 characters at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+  // Loop to compare strings 2 characters at a time starting at the beginning of the string.
+  // Ok to do this because strings are zero-padded.
+  __ Bind(&loop);
+  __ Lw(out, TMP, value_offset);
+  __ Lw(temp2, temp3, value_offset);
+  __ Bne(out, temp2, &return_false);
+  __ Addiu(TMP, TMP, 4);
+  __ Addiu(temp3, temp3, 4);
+  __ Addiu(temp1, temp1, -2);
+  __ Bgtz(temp1, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ LoadConst32(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ LoadConst32(out, 0);
+  __ Bind(&end);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                  \
@@ -204,7 +306,6 @@
 UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(StringCharAt)
 UNIMPLEMENTED_INTRINSIC(StringCompareTo)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 05c7eb0..ff843eb 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -101,11 +101,10 @@
     if (invoke_->IsInvokeStaticOrDirect()) {
       codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
                                           Location::RegisterLocation(A0));
-      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     } else {
-      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
-      UNREACHABLE();
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
     }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 14c65c9..a29f3ef 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1605,7 +1605,7 @@
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
+  locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
 }
 
 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 47457de..2bb769a 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -42,12 +42,14 @@
     loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
     loop_header_ = new (&allocator_) HBasicBlock(graph_);
     loop_body_ = new (&allocator_) HBasicBlock(graph_);
+    return_ = new (&allocator_) HBasicBlock(graph_);
     exit_ = new (&allocator_) HBasicBlock(graph_);
 
     graph_->AddBlock(entry_);
     graph_->AddBlock(loop_preheader_);
     graph_->AddBlock(loop_header_);
     graph_->AddBlock(loop_body_);
+    graph_->AddBlock(return_);
     graph_->AddBlock(exit_);
 
     graph_->SetEntryBlock(entry_);
@@ -57,8 +59,9 @@
     entry_->AddSuccessor(loop_preheader_);
     loop_preheader_->AddSuccessor(loop_header_);
     loop_header_->AddSuccessor(loop_body_);
-    loop_header_->AddSuccessor(exit_);
+    loop_header_->AddSuccessor(return_);
     loop_body_->AddSuccessor(loop_header_);
+    return_->AddSuccessor(exit_);
 
     // Provide boiler-plate instructions.
     parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
@@ -89,6 +92,7 @@
   HBasicBlock* loop_preheader_;
   HBasicBlock* loop_header_;
   HBasicBlock* loop_body_;
+  HBasicBlock* return_;
   HBasicBlock* exit_;
 
   HInstruction* parameter_;  // "this"
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ebdf7a2..1ab206f 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -17,6 +17,7 @@
 #include "locations.h"
 
 #include "nodes.h"
+#include "code_generator.h"
 
 namespace art {
 
@@ -47,18 +48,26 @@
       : Location::RequiresRegister();
 }
 
-Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
-  if (instruction->IsIntConstant() || instruction->IsNullConstant()) {
-    return Location::ConstantLocation(instruction->AsConstant());
-  } else if (instruction->IsLongConstant()) {
-    // Does the long constant fit in a 32 bit int?
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    return IsInt<32>(value)
-        ? Location::ConstantLocation(instruction->AsConstant())
-        : Location::RequiresRegister();
-  } else {
-    return Location::RequiresRegister();
+Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
+  HConstant* constant = instruction->AsConstant();
+  if (constant != nullptr) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+    if (IsInt<32>(value)) {
+      return Location::ConstantLocation(constant);
+    }
   }
+  return Location::RequiresRegister();
+}
+
+Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
+  HConstant* constant = instruction->AsConstant();
+  if (constant != nullptr) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+    if (IsInt<32>(value)) {
+      return Location::ConstantLocation(constant);
+    }
+  }
+  return Location::RequiresFpuRegister();
 }
 
 Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
@@ -67,6 +76,12 @@
       : Location::RegisterLocation(reg);
 }
 
+Location Location::FpuRegisterOrConstant(HInstruction* instruction) {
+  return instruction->IsConstant()
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresFpuRegister();
+}
+
 std::ostream& operator<<(std::ostream& os, const Location& location) {
   os << location.DebugString();
   if (location.IsRegister() || location.IsFpuRegister()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index d014379..1181007 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -354,8 +354,10 @@
   }
 
   static Location RegisterOrConstant(HInstruction* instruction);
-  static Location RegisterOrInt32LongConstant(HInstruction* instruction);
+  static Location RegisterOrInt32Constant(HInstruction* instruction);
   static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
+  static Location FpuRegisterOrConstant(HInstruction* instruction);
+  static Location FpuRegisterOrInt32Constant(HInstruction* instruction);
 
   // The location of the first input to the instruction will be
   // used to replace this unallocated location.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 68fb0ac..de3f266 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,7 @@
 #include "nodes.h"
 
 #include "code_generator.h"
+#include "common_dominator.h"
 #include "ssa_builder.h"
 #include "base/bit_vector-inl.h"
 #include "base/bit_utils.h"
@@ -179,7 +180,10 @@
       if (successor->GetDominator() == nullptr) {
         successor->SetDominator(current);
       } else {
-        successor->SetDominator(FindCommonDominator(successor->GetDominator(), current));
+        // The CommonDominator can work for multiple blocks as long as the
+        // domination information doesn't change. However, since we're changing
+        // that information here, we can use the finder only for pairs of blocks.
+        successor->SetDominator(CommonDominator::ForPair(successor->GetDominator(), current));
       }
 
       // Once all the forward edges have been visited, we know the immediate
@@ -194,24 +198,6 @@
   }
 }
 
-HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const {
-  ArenaBitVector visited(arena_, blocks_.size(), false);
-  // Walk the dominator tree of the first block and mark the visited blocks.
-  while (first != nullptr) {
-    visited.SetBit(first->GetBlockId());
-    first = first->GetDominator();
-  }
-  // Walk the dominator tree of the second block until a marked block is found.
-  while (second != nullptr) {
-    if (visited.IsBitSet(second->GetBlockId())) {
-      return second;
-    }
-    second = second->GetDominator();
-  }
-  LOG(ERROR) << "Could not find common dominator";
-  return nullptr;
-}
-
 void HGraph::TransformToSsa() {
   DCHECK(!reverse_post_order_.empty());
   SsaBuilder ssa_builder(this);
@@ -335,14 +321,24 @@
       // instructions into `normal_block` and links the two blocks with a Goto.
       // Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
       // leaving `catch_block` with the exceptional edges only.
+      //
       // Note that catch blocks with normal-flow predecessors cannot begin with
-      // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
-      DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
-      HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
-      for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
-        if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
-          catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
-          --j;
+      // a move-exception instruction, as guaranteed by the verifier. However,
+      // trivially dead predecessors are ignored by the verifier and such code
+      // has not been removed at this stage. We therefore ignore the assumption
+      // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628).
+      HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException();
+      if (normal_block == nullptr) {
+        // Catch block is either empty or only contains a move-exception. It must
+        // therefore be dead and will be removed during initial DCE. Do nothing.
+        DCHECK(!catch_block->EndsWithControlFlowInstruction());
+      } else {
+        // Catch block was split. Re-link normal-flow edges to the new block.
+        for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
+          if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
+            catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
+            --j;
+          }
         }
       }
     }
@@ -373,19 +369,27 @@
 }
 
 void HGraph::SimplifyCFG() {
-  // Simplify the CFG for future analysis, and code generation:
+// Simplify the CFG for future analysis, and code generation:
   // (1): Split critical edges.
-  // (2): Simplify loops by having only one back edge, and one preheader.
+  // (2): Simplify loops by having only one preheader.
   // NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
   // can be invalidated. We remember the initial size to avoid iterating over the new blocks.
   for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
     HBasicBlock* block = blocks_[block_id];
     if (block == nullptr) continue;
-    if (block->NumberOfNormalSuccessors() > 1) {
-      for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
+    if (block->GetSuccessors().size() > 1) {
+      // Only split normal-flow edges. We cannot split exceptional edges as they
+      // are synthesized (approximate real control flow), and we do not need to
+      // anyway. Moves that would be inserted there are performed by the runtime.
+      for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
         HBasicBlock* successor = block->GetSuccessors()[j];
         DCHECK(!successor->IsCatchBlock());
-        if (successor->GetPredecessors().size() > 1) {
+        if (successor == exit_block_) {
+          // Throw->TryBoundary->Exit. Special case which we do not want to split
+          // because Goto->Exit is not allowed.
+          DCHECK(block->IsSingleTryBoundary());
+          DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow());
+        } else if (successor->GetPredecessors().size() > 1) {
           SplitCriticalEdge(block, successor);
           --j;
         }
@@ -1163,7 +1167,7 @@
 }
 
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
@@ -1193,7 +1197,7 @@
 }
 
 HBasicBlock* HBasicBlock::CreateImmediateDominator() {
-  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented.";
 
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
@@ -1209,6 +1213,34 @@
   return new_block;
 }
 
+HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {
+  DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
+  DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only.";
+
+  HInstruction* first_insn = GetFirstInstruction();
+  HInstruction* split_before = nullptr;
+
+  if (first_insn != nullptr && first_insn->IsLoadException()) {
+    // Catch block starts with a LoadException. Split the block after
+    // the StoreLocal and ClearException which must come after the load.
+    DCHECK(first_insn->GetNext()->IsStoreLocal());
+    DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
+    split_before = first_insn->GetNext()->GetNext()->GetNext();
+  } else {
+    // Catch block does not load the exception. Split at the beginning
+    // to create an empty catch block.
+    split_before = first_insn;
+  }
+
+  if (split_before == nullptr) {
+    // Catch block has no instructions after the split point (must be dead).
+    // Do not split it but rather signal error by returning nullptr.
+    return nullptr;
+  } else {
+    return SplitBefore(split_before);
+  }
+}
+
 HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
   DCHECK(!cursor->IsControlFlow());
   DCHECK_NE(instructions_.last_instruction_, cursor);
@@ -1940,6 +1972,16 @@
   return !opt.GetDoesNotNeedDexCache();
 }
 
+void HInvokeStaticOrDirect::RemoveInputAt(size_t index) {
+  RemoveAsUserOfInput(index);
+  inputs_.erase(inputs_.begin() + index);
+  // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+  for (size_t i = index, e = InputCount(); i < e; ++i) {
+    DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
+    InputRecordAt(i).GetUseNode()->SetIndex(i);
+  }
+}
+
 void HInstruction::RemoveEnvironmentUsers() {
   for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
     HUseListNode<HEnvironment*>* user_node = use_it.Current();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f2c1cf..ab53e63 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -350,8 +350,6 @@
 
   HCurrentMethod* GetCurrentMethod();
 
-  HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
-
   const DexFile& GetDexFile() const {
     return dex_file_;
   }
@@ -837,6 +835,15 @@
   // blocks are consistent (for example ending with a control flow instruction).
   HBasicBlock* SplitAfter(HInstruction* cursor);
 
+  // Split catch block into two blocks after the original move-exception bytecode
+  // instruction, or at the beginning if not present. Returns the newly created,
+  // latter block, or nullptr if such block could not be created (must be dead
+  // in that case). Note that this method just updates raw block information,
+  // like predecessors, successors, dominators, and instruction list. It does not
+  // update the graph, reverse post order, loop information, nor make sure the
+  // blocks are consistent (for example ending with a control flow instruction).
+  HBasicBlock* SplitCatchBlockAfterMoveException();
+
   // Merge `other` at the end of `this`. Successors and dominated blocks of
   // `other` are changed to be successors and dominated blocks of `this`. Note
   // that this method does not update the graph, reverse post order, loop
@@ -3399,11 +3406,12 @@
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
-                // There is one extra argument for the HCurrentMethod node, and
-                // potentially one other if the clinit check is explicit, and one other
-                // if the method is a string factory.
-                1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u)
-                   + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+                // There is potentially one extra argument for the HCurrentMethod node, and
+                // potentially one other if the clinit check is explicit, and potentially
+                // one other if the method is a string factory.
+                (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
+                    (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) +
+                    (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
                 return_type,
                 dex_pc,
                 method_index,
@@ -3411,12 +3419,25 @@
         invoke_type_(invoke_type),
         clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
-        dispatch_info_(dispatch_info) {}
+        dispatch_info_(dispatch_info) { }
 
   void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+    bool had_current_method_input = HasCurrentMethodInput();
+    bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind);
+
+    // Using the current method is the default and once we find a better
+    // method load kind, we should not go back to using the current method.
+    DCHECK(had_current_method_input || !needs_current_method_input);
+
+    if (had_current_method_input && !needs_current_method_input) {
+      DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+      RemoveInputAt(GetCurrentMethodInputIndex());
+    }
     dispatch_info_ = dispatch_info;
   }
 
+  void RemoveInputAt(size_t index);
+
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // We access the method via the dex cache so we can't do an implicit null check.
     // TODO: for intrinsics we can generate implicit null checks.
@@ -3438,6 +3459,17 @@
   bool HasPcRelDexCache() const {
     return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
   }
+  bool HasCurrentMethodInput() const {
+    // This function can be called only after the invoke has been fully initialized by the builder.
+    if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
+      DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod());
+      return true;
+    } else {
+      DCHECK(InputCount() == GetCurrentMethodInputIndex() ||
+             !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod());
+      return false;
+    }
+  }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
 
@@ -3486,8 +3518,8 @@
 
   bool IsStringFactoryFor(HFakeString* str) const {
     if (!IsStringInit()) return false;
-    // +1 for the current method.
-    if (InputCount() == (number_of_arguments_ + 1)) return false;
+    DCHECK(!HasCurrentMethodInput());
+    if (InputCount() == (number_of_arguments_)) return false;
     return InputAt(InputCount() - 1)->AsFakeString() == str;
   }
 
@@ -3513,6 +3545,11 @@
     return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit);
   }
 
+  // Does this method load kind need the current method as an input?
+  static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
+    return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod;
+  }
+
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  protected:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8cb2cfc..7e3c5e6 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -56,6 +56,7 @@
 #include "inliner.h"
 #include "instruction_simplifier.h"
 #include "intrinsics.h"
+#include "jit/jit_code_cache.h"
 #include "licm.h"
 #include "jni/quick/jni_compiler.h"
 #include "load_store_elimination.h"
@@ -258,15 +259,6 @@
                           const DexFile& dex_file,
                           Handle<mirror::DexCache> dex_cache) const OVERRIDE;
 
-  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
-                             uint32_t access_flags,
-                             InvokeType invoke_type,
-                             uint16_t class_def_idx,
-                             uint32_t method_idx,
-                             jobject class_loader,
-                             const DexFile& dex_file,
-                             Handle<mirror::DexCache> dex_cache) const;
-
   CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
                              const DexFile& dex_file) const OVERRIDE {
@@ -291,23 +283,45 @@
     }
   }
 
+  bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+      OVERRIDE
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   // Whether we should run any optimization or register allocation. If false, will
   // just run the code generation after the graph was built.
   const bool run_optimizations_;
 
-  // Optimize and compile `graph`.
-  CompiledMethod* CompileOptimized(HGraph* graph,
-                                   CodeGenerator* codegen,
-                                   CompilerDriver* driver,
-                                   const DexCompilationUnit& dex_compilation_unit,
-                                   PassObserver* pass_observer) const;
+  // Create a 'CompiledMethod' for an optimized graph.
+  CompiledMethod* EmitOptimized(ArenaAllocator* arena,
+                                CodeVectorAllocator* code_allocator,
+                                CodeGenerator* codegen,
+                                CompilerDriver* driver) const;
 
-  // Just compile without doing optimizations.
-  CompiledMethod* CompileBaseline(CodeGenerator* codegen,
-                                  CompilerDriver* driver,
-                                  const DexCompilationUnit& dex_compilation_unit,
-                                  PassObserver* pass_observer) const;
+  // Create a 'CompiledMethod' for a non-optimized graph.
+  CompiledMethod* EmitBaseline(ArenaAllocator* arena,
+                               CodeVectorAllocator* code_allocator,
+                               CodeGenerator* codegen,
+                               CompilerDriver* driver) const;
+
+  // Try compiling a method and return the code generator used for
+  // compiling it.
+  // This method:
+  // 1) Builds the graph. Returns null if it failed to build it.
+  // 2) If `run_optimizations_` is set:
+  //    2.1) Transform the graph to SSA. Returns null if it failed.
+  //    2.2) Run optimizations on the graph, including register allocator.
+  // 3) Generate code with the `code_allocator` provided.
+  CodeGenerator* TryCompile(ArenaAllocator* arena,
+                            CodeVectorAllocator* code_allocator,
+                            const DexFile::CodeItem* code_item,
+                            uint32_t access_flags,
+                            InvokeType invoke_type,
+                            uint16_t class_def_idx,
+                            uint32_t method_idx,
+                            jobject class_loader,
+                            const DexFile& dex_file,
+                            Handle<mirror::DexCache> dex_cache) const;
 
   std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
 
@@ -446,13 +460,32 @@
   }
 }
 
+NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
+static void AllocateRegisters(HGraph* graph,
+                              CodeGenerator* codegen,
+                              PassObserver* pass_observer) {
+  PrepareForRegisterAllocation(graph).Run();
+  SsaLivenessAnalysis liveness(graph, codegen);
+  {
+    PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
+    liveness.Analyze();
+  }
+  {
+    PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
+    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+  }
+}
+
 static void RunOptimizations(HGraph* graph,
                              CodeGenerator* codegen,
                              CompilerDriver* driver,
                              OptimizingCompilerStats* stats,
                              const DexCompilationUnit& dex_compilation_unit,
-                             PassObserver* pass_observer,
-                             StackHandleScopeCollection* handles) {
+                             PassObserver* pass_observer) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScopeCollection handles(soa.Self());
+  ScopedThreadSuspension sts(soa.Self(), kNative);
+
   ArenaAllocator* arena = graph->GetArena();
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -469,7 +502,7 @@
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
   ReferenceTypePropagation* type_propagation =
-      new (arena) ReferenceTypePropagation(graph, handles);
+      new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_after_types");
@@ -492,7 +525,7 @@
 
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
-  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+  MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
 
   // TODO: Update passes incompatible with try/catch so we have the same
   //       pipeline for all methods.
@@ -532,6 +565,7 @@
   }
 
   RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
+  AllocateRegisters(graph, codegen, pass_observer);
 }
 
 // The stack map we generate must be 4-byte aligned on ARM. Since existing
@@ -545,22 +579,6 @@
   return ArrayRef<const uint8_t>(vector);
 }
 
-NO_INLINE  // Avoid increasing caller's frame size by large stack-allocated objects.
-static void AllocateRegisters(HGraph* graph,
-                              CodeGenerator* codegen,
-                              PassObserver* pass_observer) {
-  PrepareForRegisterAllocation(graph).Run();
-  SsaLivenessAnalysis liveness(graph, codegen);
-  {
-    PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
-    liveness.Analyze();
-  }
-  {
-    PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
-    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
-  }
-}
-
 static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
   ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
   codegen->EmitLinkerPatches(&linker_patches);
@@ -574,74 +592,42 @@
   return linker_patches;
 }
 
-CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
-                                                     CodeGenerator* codegen,
-                                                     CompilerDriver* compiler_driver,
-                                                     const DexCompilationUnit& dex_compilation_unit,
-                                                     PassObserver* pass_observer) const {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
-  soa.Self()->TransitionFromRunnableToSuspended(kNative);
-  RunOptimizations(graph,
-                   codegen,
-                   compiler_driver,
-                   compilation_stats_.get(),
-                   dex_compilation_unit,
-                   pass_observer,
-                   &handles);
-
-  AllocateRegisters(graph, codegen, pass_observer);
-
-  ArenaAllocator* arena = graph->GetArena();
-  CodeVectorAllocator allocator(arena);
-  DefaultSrcMap src_mapping_table;
-  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
-                         ? &src_mapping_table
-                         : nullptr);
-  codegen->CompileOptimized(&allocator);
-
+CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
+                                                  CodeVectorAllocator* code_allocator,
+                                                  CodeGenerator* codegen,
+                                                  CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
-  codegen->BuildStackMaps(&stack_map);
+  stack_map.resize(codegen->ComputeStackMapsSize());
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
 
   MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(allocator.GetMemory()),
+      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(src_mapping_table),
+      ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
       ArrayRef<const uint8_t>(),  // mapping_table.
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(),  // native_gc_map.
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
-  pass_observer->DumpDisassembly();
 
-  soa.Self()->TransitionFromSuspendedToRunnable();
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::CompileBaseline(
+CompiledMethod* OptimizingCompiler::EmitBaseline(
+    ArenaAllocator* arena,
+    CodeVectorAllocator* code_allocator,
     CodeGenerator* codegen,
-    CompilerDriver* compiler_driver,
-    const DexCompilationUnit& dex_compilation_unit,
-    PassObserver* pass_observer) const {
-  ArenaAllocator* arena = codegen->GetGraph()->GetArena();
-  CodeVectorAllocator allocator(arena);
-  DefaultSrcMap src_mapping_table;
-  codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
-                         ? &src_mapping_table
-                         : nullptr);
-  codegen->CompileBaseline(&allocator);
-
+    CompilerDriver* compiler_driver) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
 
   ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
@@ -649,37 +635,38 @@
   ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildVMapTable(&vmap_table);
   ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
-  codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
+  codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
 
   MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
-      ArrayRef<const uint8_t>(allocator.GetMemory()),
+      ArrayRef<const uint8_t>(code_allocator->GetMemory()),
       // Follow Quick's behavior and set the frame size to zero if it is
       // considered "empty" (see the definition of
       // art::CodeGenerator::HasEmptyFrame).
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(src_mapping_table),
+      ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
       AlignVectorSize(mapping_table),
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
-  pass_observer->DumpDisassembly();
   return compiled_method;
 }
 
-CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
-                                               uint32_t access_flags,
-                                               InvokeType invoke_type,
-                                               uint16_t class_def_idx,
-                                               uint32_t method_idx,
-                                               jobject class_loader,
-                                               const DexFile& dex_file,
-                                               Handle<mirror::DexCache> dex_cache) const {
+CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
+                                              CodeVectorAllocator* code_allocator,
+                                              const DexFile::CodeItem* code_item,
+                                              uint32_t access_flags,
+                                              InvokeType invoke_type,
+                                              uint16_t class_def_idx,
+                                              uint32_t method_idx,
+                                              jobject class_loader,
+                                              const DexFile& dex_file,
+                                              Handle<mirror::DexCache> dex_cache) const {
   std::string method_name = PrettyMethod(method_idx, dex_file);
   MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
   CompilerDriver* compiler_driver = GetCompilerDriver();
@@ -721,13 +708,10 @@
       && compiler_driver->RequiresConstructorBarrier(Thread::Current(),
                                                      dex_compilation_unit.GetDexFile(),
                                                      dex_compilation_unit.GetClassDefIndex());
-  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
-  HGraph* graph = new (&arena) HGraph(
-      &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
+  HGraph* graph = new (arena) HGraph(
+      arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
       kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
 
-  bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
-
   std::unique_ptr<CodeGenerator> codegen(
       CodeGenerator::Create(graph,
                             instruction_set,
@@ -779,16 +763,8 @@
     }
   }
 
-  bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
-
-  // `run_optimizations_` is set explicitly (either through a compiler filter
-  // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
-  // to Quick.
-  bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
-  CompiledMethod* compiled_method = nullptr;
-  if (run_optimizations_ && can_allocate_registers) {
-    VLOG(compiler) << "Optimizing " << method_name;
-
+  VLOG(compiler) << "Optimizing " << method_name;
+  if (run_optimizations_) {
     {
       PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
       if (!graph->TryBuildingSsa()) {
@@ -800,37 +776,26 @@
       }
     }
 
-    compiled_method = CompileOptimized(graph,
-                                       codegen.get(),
-                                       compiler_driver,
-                                       dex_compilation_unit,
-                                       &pass_observer);
-  } else if (shouldOptimize && can_allocate_registers) {
-    LOG(FATAL) << "Could not allocate registers in optimizing compiler";
-    UNREACHABLE();
-  } else if (can_use_baseline) {
-    VLOG(compiler) << "Compile baseline " << method_name;
-
-    if (!run_optimizations_) {
-      MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled);
-    } else if (!can_allocate_registers) {
-      MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
-    }
-
-    compiled_method = CompileBaseline(codegen.get(),
-                                      compiler_driver,
-                                      dex_compilation_unit,
-                                      &pass_observer);
+    RunOptimizations(graph,
+                     codegen.get(),
+                     compiler_driver,
+                     compilation_stats_.get(),
+                     dex_compilation_unit,
+                     &pass_observer);
+    codegen->CompileOptimized(code_allocator);
+  } else {
+    codegen->CompileBaseline(code_allocator);
   }
+  pass_observer.DumpDisassembly();
 
   if (kArenaAllocatorCountAllocations) {
-    if (arena.BytesAllocated() > 4 * MB) {
-      MemStats mem_stats(arena.GetMemStats());
+    if (arena->BytesAllocated() > 4 * MB) {
+      MemStats mem_stats(arena->GetMemStats());
       LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
     }
   }
 
-  return compiled_method;
+  return codegen.release();
 }
 
 static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
@@ -852,26 +817,37 @@
                                             Handle<mirror::DexCache> dex_cache) const {
   CompilerDriver* compiler_driver = GetCompilerDriver();
   CompiledMethod* method = nullptr;
-  if (Runtime::Current()->IsAotCompiler()) {
-    const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
-    DCHECK(!verified_method->HasRuntimeThrow());
-    if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
-        || CanHandleVerificationFailure(verified_method)) {
-       method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                           method_idx, jclass_loader, dex_file, dex_cache);
-    } else {
-      if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
+  DCHECK(!verified_method->HasRuntimeThrow());
+  if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
+      || CanHandleVerificationFailure(verified_method)) {
+    ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+    CodeVectorAllocator code_allocator(&arena);
+    std::unique_ptr<CodeGenerator> codegen(
+        TryCompile(&arena,
+                   &code_allocator,
+                   code_item,
+                   access_flags,
+                   invoke_type,
+                   class_def_idx,
+                   method_idx,
+                   jclass_loader,
+                   dex_file,
+                   dex_cache));
+    if (codegen.get() != nullptr) {
+      if (run_optimizations_) {
+        method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
       } else {
-        MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+        method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
       }
     }
   } else {
-    // This is for the JIT compiler, which has already ensured the class is verified.
-    // We can go straight to compiling.
-    DCHECK(Runtime::Current()->UseJit());
-    method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                        method_idx, jclass_loader, dex_file, dex_cache);
+    if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+    } else {
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+    }
   }
 
   if (kIsDebugBuild &&
@@ -896,4 +872,70 @@
   return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art");
 }
 
+bool OptimizingCompiler::JitCompile(Thread* self,
+                                    jit::JitCodeCache* code_cache,
+                                    ArtMethod* method) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      method->GetDeclaringClass()->GetClassLoader()));
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+
+  jobject jclass_loader = class_loader.ToJObject();
+  const DexFile* dex_file = method->GetDexFile();
+  const uint16_t class_def_idx = method->GetClassDefIndex();
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+  const uint32_t method_idx = method->GetDexMethodIndex();
+  const uint32_t access_flags = method->GetAccessFlags();
+  const InvokeType invoke_type = method->GetInvokeType();
+
+  ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+  CodeVectorAllocator code_allocator(&arena);
+  std::unique_ptr<CodeGenerator> codegen;
+  {
+    // Go to native so that we don't block GC during compilation.
+    ScopedThreadSuspension sts(self, kNative);
+
+    DCHECK(run_optimizations_);
+    codegen.reset(
+        TryCompile(&arena,
+                   &code_allocator,
+                   code_item,
+                   access_flags,
+                   invoke_type,
+                   class_def_idx,
+                   method_idx,
+                   jclass_loader,
+                   *dex_file,
+                   dex_cache));
+    if (codegen.get() == nullptr) {
+      return false;
+    }
+  }
+
+  size_t stack_map_size = codegen->ComputeStackMapsSize();
+  uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size);
+  if (stack_map_data == nullptr) {
+    return false;
+  }
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+  const void* code = code_cache->CommitCode(
+      self,
+      method,
+      nullptr,
+      stack_map_data,
+      nullptr,
+      codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+      codegen->GetCoreSpillMask(),
+      codegen->GetFpuSpillMask(),
+      code_allocator.GetMemory().data(),
+      code_allocator.GetSize());
+
+  if (code == nullptr) {
+    code_cache->ClearData(self, stack_map_data);
+    return false;
+  }
+
+  return true;
+}
+
 }  // namespace art
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 00e8995..ba2525e 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -117,14 +117,6 @@
   Emit(encoding);
 }
 
-void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x20);
-}
-
-void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
-  EmitI(0x8, rs, rt, imm16);
-}
-
 void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
@@ -141,10 +133,6 @@
   EmitI(0x19, rs, rt, imm16);
 }
 
-void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, rd, 0, 0x22);
-}
-
 void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x23);
 }
@@ -153,50 +141,14 @@
   EmitR(0, rs, rt, rd, 0, 0x2f);
 }
 
-void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18);
-}
-
-void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) {
-  EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b);
-}
-
-void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  EmitR(0x1c, rs, rt, rd, 0, 2);
-}
-
-void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivR2(rs, rt);
-  Mfhi(rd);
-}
-
-void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mflo(rd);
-}
-
-void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
-  DivuR2(rs, rt);
-  Mfhi(rd);
-}
-
 void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x18);
 }
 
+void Mips64Assembler::MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x18);
+}
+
 void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1a);
 }
@@ -217,6 +169,10 @@
   EmitR(0, rs, rt, rd, 2, 0x1c);
 }
 
+void Mips64Assembler::Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
+  EmitR(0, rs, rt, rd, 3, 0x1c);
+}
+
 void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 2, 0x1e);
 }
@@ -440,14 +396,6 @@
            static_cast<GpuRegister>(0), stype & 0x1f, 0xf);
 }
 
-void Mips64Assembler::Mfhi(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10);
-}
-
-void Mips64Assembler::Mflo(GpuRegister rd) {
-  EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12);
-}
-
 void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
   EmitI(0x28, rs, rt, imm16);
 }
@@ -892,45 +840,58 @@
   } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
     Lui(rd, value >> 16);
     Dati(rd, (value >> 48) + bit31);
+  } else if (IsPowerOfTwo(value + UINT64_C(1))) {
+    int shift_cnt = 64 - CTZ(value + UINT64_C(1));
+    Daddiu(rd, ZERO, -1);
+    if (shift_cnt < 32) {
+      Dsrl(rd, rd, shift_cnt);
+    } else {
+      Dsrl32(rd, rd, shift_cnt & 31);
+    }
   } else {
     int shift_cnt = CTZ(value);
     int64_t tmp = value >> shift_cnt;
     if (IsUint<16>(tmp)) {
       Ori(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<16>(tmp)) {
       Daddiu(rd, ZERO, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else if (IsInt<32>(tmp)) {
       // Loads with 3 instructions.
       Lui(rd, tmp >> 16);
       Ori(rd, rd, tmp);
-      if (shift_cnt < 32)
+      if (shift_cnt < 32) {
         Dsll(rd, rd, shift_cnt);
-      else
+      } else {
         Dsll32(rd, rd, shift_cnt & 31);
+      }
     } else {
       shift_cnt = 16 + CTZ(value >> 16);
       tmp = value >> shift_cnt;
       if (IsUint<16>(tmp)) {
         Ori(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else if (IsInt<16>(tmp)) {
         Daddiu(rd, ZERO, tmp);
-        if (shift_cnt < 32)
+        if (shift_cnt < 32) {
           Dsll(rd, rd, shift_cnt);
-        else
+        } else {
           Dsll32(rd, rd, shift_cnt & 31);
+        }
         Ori(rd, rd, value);
       } else {
         // Loads with 3-4 instructions.
@@ -941,10 +902,11 @@
           used_lui = true;
         }
         if ((tmp2 & 0xFFFF) != 0) {
-          if (used_lui)
+          if (used_lui) {
             Ori(rd, rd, tmp2);
-          else
+          } else {
             Ori(rd, ZERO, tmp2);
+          }
         }
         if (bit31) {
           tmp2 += UINT64_C(0x100000000);
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 33f22d2..42962bc 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -66,35 +66,25 @@
   virtual ~Mips64Assembler() {}
 
   // Emit Machine Instructions.
-  void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt);
-  void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
   void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
-  void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
-  void MultR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MultuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rs, GpuRegister rt);  // R2
-  void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R2
-  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // R6
-  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
-  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64 R6
+  void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
+  void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
+  void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
 
   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -104,12 +94,12 @@
   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
 
-  void Bitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Dbitswap(GpuRegister rd, GpuRegister rt);  // R6
-  void Seb(GpuRegister rd, GpuRegister rt);  // R2+
-  void Seh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dsbh(GpuRegister rd, GpuRegister rt);  // R2+
-  void Dshd(GpuRegister rd, GpuRegister rt);  // R2+
+  void Bitswap(GpuRegister rd, GpuRegister rt);
+  void Dbitswap(GpuRegister rd, GpuRegister rt);
+  void Seb(GpuRegister rd, GpuRegister rt);
+  void Seh(GpuRegister rd, GpuRegister rt);
+  void Dsbh(GpuRegister rd, GpuRegister rt);
+  void Dshd(GpuRegister rd, GpuRegister rt);
   void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one);  // MIPS64
   void Wsbh(GpuRegister rd, GpuRegister rt);
   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
@@ -146,11 +136,9 @@
   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
-  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
-  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64 R6
+  void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
+  void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
-  void Mfhi(GpuRegister rd);  // R2
-  void Mflo(GpuRegister rd);  // R2
 
   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
@@ -175,21 +163,21 @@
   void Jalr(GpuRegister rd, GpuRegister rs);
   void Jalr(GpuRegister rs);
   void Jr(GpuRegister rs);
-  void Auipc(GpuRegister rs, uint16_t imm16);  // R6
-  void Jic(GpuRegister rt, uint16_t imm16);  // R6
-  void Jialc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bltzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgtzc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Blezc(GpuRegister rt, uint16_t imm16);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R6
-  void Beqzc(GpuRegister rs, uint32_t imm21);  // R6
-  void Bnezc(GpuRegister rs, uint32_t imm21);  // R6
+  void Auipc(GpuRegister rs, uint16_t imm16);
+  void Jic(GpuRegister rt, uint16_t imm16);
+  void Jialc(GpuRegister rt, uint16_t imm16);
+  void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bltzc(GpuRegister rt, uint16_t imm16);
+  void Bgtzc(GpuRegister rt, uint16_t imm16);
+  void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgezc(GpuRegister rt, uint16_t imm16);
+  void Blezc(GpuRegister rt, uint16_t imm16);
+  void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
+  void Beqzc(GpuRegister rs, uint32_t imm21);
+  void Bnezc(GpuRegister rs, uint32_t imm21);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -259,25 +247,25 @@
   void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
-  void Bind(Label* label) OVERRIDE;  // R6
+  void Bind(Label* label) OVERRIDE;
   void Jump(Label* label) OVERRIDE {
     B(label);
   }
-  void B(Label* label);  // R6
-  void Jalr(Label* label, GpuRegister indirect_reg = RA);  // R6
+  void B(Label* label);
+  void Jalr(Label* label, GpuRegister indirect_reg = RA);
   // TODO: implement common for R6 and non-R6 interface for conditional branches?
-  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bltzc(GpuRegister rt, Label* label);  // R6
-  void Bgtzc(GpuRegister rt, Label* label);  // R6
-  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgezc(GpuRegister rt, Label* label);  // R6
-  void Blezc(GpuRegister rt, Label* label);  // R6
-  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);  // R6
-  void Beqzc(GpuRegister rs, Label* label);  // R6
-  void Bnezc(GpuRegister rs, Label* label);  // R6
+  void Bltc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bltzc(GpuRegister rt, Label* label);
+  void Bgtzc(GpuRegister rt, Label* label);
+  void Bgec(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bgezc(GpuRegister rt, Label* label);
+  void Blezc(GpuRegister rt, Label* label);
+  void Bltuc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Beqc(GpuRegister rs, GpuRegister rt, Label* label);
+  void Bnec(GpuRegister rs, GpuRegister rt, Label* label);
+  void Beqzc(GpuRegister rs, Label* label);
+  void Bnezc(GpuRegister rs, Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2653807..92ed58c 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1355,9 +1355,20 @@
       uint32_t image_file_location_oat_checksum = 0;
       uintptr_t image_file_location_oat_data_begin = 0;
       int32_t image_patch_delta = 0;
+
+      if (app_image_ && image_base_ == 0) {
+        gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
+        image_base_ = RoundUp(
+            reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatFileEnd()),
+            kPageSize);
+        VLOG(compiler) << "App image base=" << reinterpret_cast<void*>(image_base_);
+      }
+
       if (IsImage()) {
         PrepareImageWriter(image_base_);
-      } else {
+      }
+
+      if (!IsBootImage()) {
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
         gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
         image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
@@ -1371,11 +1382,13 @@
         key_value_store_->Put(OatHeader::kImageLocationKey, image_file_location);
       }
 
-      oat_writer.reset(new OatWriter(dex_files_, image_file_location_oat_checksum,
+      oat_writer.reset(new OatWriter(dex_files_,
+                                     image_file_location_oat_checksum,
                                      image_file_location_oat_data_begin,
                                      image_patch_delta,
                                      driver_.get(),
                                      image_writer_.get(),
+                                     IsBootImage(),
                                      timings_,
                                      key_value_store_.get()));
     }
@@ -1591,7 +1604,11 @@
   }
 
   void PrepareImageWriter(uintptr_t image_base) {
-    image_writer_.reset(new ImageWriter(*driver_, image_base, compiler_options_->GetCompilePic()));
+    DCHECK(IsImage());
+    image_writer_.reset(new ImageWriter(*driver_,
+                                        image_base,
+                                        compiler_options_->GetCompilePic(),
+                                        IsAppImage()));
   }
 
   // Let the ImageWriter write the image file. If we do not compile PIC, also fix up the oat file.
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index ea61b43..a163380 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -98,33 +98,6 @@
 
 class OatSymbolizer FINAL {
  public:
-  class RodataWriter FINAL : public CodeOutput {
-   public:
-    explicit RodataWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
-
-    bool Write(OutputStream* out) OVERRIDE {
-      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-      return out->WriteFully(oat_file_->Begin(), rodata_size);
-    }
-
-   private:
-    const OatFile* oat_file_;
-  };
-
-  class TextWriter FINAL : public CodeOutput {
-   public:
-    explicit TextWriter(const OatFile* oat_file) : oat_file_(oat_file) {}
-
-    bool Write(OutputStream* out) OVERRIDE {
-      const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-      const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
-      return out->WriteFully(text_begin, oat_file_->End() - text_begin);
-    }
-
-   private:
-    const OatFile* oat_file_;
-  };
-
   OatSymbolizer(const OatFile* oat_file, const std::string& output_name) :
       oat_file_(oat_file), builder_(nullptr),
       output_name_(output_name.empty() ? "symbolized.oat" : output_name) {
@@ -139,31 +112,57 @@
                                           uint32_t);
 
   bool Symbolize() {
-    Elf32_Word rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
-    uint32_t size = static_cast<uint32_t>(oat_file_->End() - oat_file_->Begin());
-    uint32_t text_size = size - rodata_size;
-    uint32_t bss_size = oat_file_->BssSize();
-    RodataWriter rodata_writer(oat_file_);
-    TextWriter text_writer(oat_file_);
-    builder_.reset(new ElfBuilder<ElfTypes32>(
-        oat_file_->GetOatHeader().GetInstructionSet(),
-        rodata_size, &rodata_writer,
-        text_size, &text_writer,
-        bss_size));
+    const InstructionSet isa = oat_file_->GetOatHeader().GetInstructionSet();
+
+    File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
+    std::unique_ptr<BufferedOutputStream> output_stream(
+        new BufferedOutputStream(new FileOutputStream(elf_file)));
+    builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
+
+    builder_->Start();
+
+    auto* rodata = builder_->GetRoData();
+    auto* text = builder_->GetText();
+    auto* bss = builder_->GetBss();
+    auto* strtab = builder_->GetStrTab();
+    auto* symtab = builder_->GetSymTab();
+
+    rodata->Start();
+    const uint8_t* rodata_begin = oat_file_->Begin();
+    const size_t rodata_size = oat_file_->GetOatHeader().GetExecutableOffset();
+    rodata->WriteFully(rodata_begin, rodata_size);
+    rodata->End();
+
+    text->Start();
+    const uint8_t* text_begin = oat_file_->Begin() + rodata_size;
+    const size_t text_size = oat_file_->End() - text_begin;
+    text->WriteFully(text_begin, text_size);
+    text->End();
+
+    if (oat_file_->BssSize() != 0) {
+      bss->Start();
+      bss->SetSize(oat_file_->BssSize());
+      bss->End();
+    }
+
+    builder_->WriteDynamicSection(elf_file->GetPath());
 
     Walk(&art::OatSymbolizer::RegisterForDedup);
 
     NormalizeState();
 
+    strtab->Start();
+    strtab->Write("");  // strtab should start with empty string.
     Walk(&art::OatSymbolizer::AddSymbol);
+    strtab->End();
 
-    File* elf_output = OS::CreateEmptyFile(output_name_.c_str());
-    bool result = builder_->Write(elf_output);
+    symtab->Start();
+    symtab->Write();
+    symtab->End();
 
-    // Ignore I/O errors.
-    UNUSED(elf_output->FlushClose());
+    builder_->End();
 
-    return result;
+    return builder_->Good() && output_stream->Flush();
   }
 
   void Walk(Callback callback) {
@@ -295,9 +294,8 @@
         pretty_name = "[Dedup]" + pretty_name;
       }
 
-      auto* symtab = builder_->GetSymtab();
-
-      symtab->AddSymbol(pretty_name, builder_->GetText(),
+      int name_offset = builder_->GetStrTab()->Write(pretty_name);
+      builder_->GetSymTab()->Add(name_offset, builder_->GetText(),
           oat_method.GetCodeOffset() - oat_file_->GetOatHeader().GetExecutableOffset(),
           true, oat_method.GetQuickCodeSize(), STB_GLOBAL, STT_FUNC);
     }
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 0f784ed..1096af0 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -59,6 +59,8 @@
   SP   = 29,  // Stack pointer.
   FP   = 30,  // Saved value/frame pointer.
   RA   = 31,  // Return address.
+  TR   = S1,  // ART Thread Register
+  TMP  = T8,  // scratch register (in addition to AT)
   kNumberOfCoreRegisters = 32,
   kNoRegister = -1  // Signals an illegal register.
 };
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 2a8cf99..dbb546d 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -456,6 +456,16 @@
   return method_header;
 }
 
+bool ArtMethod::HasAnyCompiledCode() {
+  // Check whether the JIT has compiled it.
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr && jit->GetCodeCache()->ContainsMethod(this)) {
+    return true;
+  }
+
+  // Check whether we have AOT code.
+  return Runtime::Current()->GetClassLinker()->GetOatMethodQuickCodeFor(this) != nullptr;
+}
 
 void ArtMethod::CopyFrom(ArtMethod* src, size_t image_pointer_size) {
   memcpy(reinterpret_cast<void*>(this), reinterpret_cast<const void*>(src),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index ce9f202..201b3e6 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -454,6 +454,9 @@
   const OatQuickMethodHeader* GetOatQuickMethodHeader(uintptr_t pc)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns whether the method has any compiled code, JIT or AOT.
+  bool HasAnyCompiledCode() SHARED_REQUIRES(Locks::mutator_lock_);
+
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index da70456..5dac95d 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -104,12 +104,13 @@
   va_end(args);
 }
 
-bool ClassLinker::HasInitWithString(Thread* self, const char* descriptor) {
+static bool HasInitWithString(Thread* self, ClassLinker* class_linker, const char* descriptor)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
   ArtMethod* method = self->GetCurrentMethod(nullptr);
   StackHandleScope<1> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(method != nullptr ?
       method->GetDeclaringClass()->GetClassLoader() : nullptr));
-  mirror::Class* exception_class = FindClass(self, descriptor, class_loader);
+  mirror::Class* exception_class = class_linker->FindClass(self, descriptor, class_loader);
 
   if (exception_class == nullptr) {
     // No exc class ~ no <init>-with-string.
@@ -119,11 +120,40 @@
   }
 
   ArtMethod* exception_init_method = exception_class->FindDeclaredDirectMethod(
-      "<init>", "(Ljava/lang/String;)V", image_pointer_size_);
+      "<init>", "(Ljava/lang/String;)V", class_linker->GetImagePointerSize());
   return exception_init_method != nullptr;
 }
 
-void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c) {
+// Helper for ThrowEarlierClassFailure. Throws the stored error.
+static void HandleEarlierVerifyError(Thread* self, ClassLinker* class_linker, mirror::Class* c)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  mirror::Object* obj = c->GetVerifyError();
+  DCHECK(obj != nullptr);
+  self->AssertNoPendingException();
+  if (obj->IsClass()) {
+    // Previous error has been stored as class. Create a new exception of that type.
+
+    // It's possible the exception doesn't have a <init>(String).
+    std::string temp;
+    const char* descriptor = obj->AsClass()->GetDescriptor(&temp);
+
+    if (HasInitWithString(self, class_linker, descriptor)) {
+      self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
+    } else {
+      self->ThrowNewException(descriptor, nullptr);
+    }
+  } else {
+    // Previous error has been stored as an instance. Just rethrow.
+    mirror::Class* throwable_class =
+        self->DecodeJObject(WellKnownClasses::java_lang_Throwable)->AsClass();
+    mirror::Class* error_class = obj->GetClass();
+    CHECK(throwable_class->IsAssignableFrom(error_class));
+    self->SetException(obj->AsThrowable());
+  }
+  self->AssertPendingException();
+}
+
+void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def) {
   // The class failed to initialize on a previous attempt, so we want to throw
   // a NoClassDefFoundError (v2 2.17.5).  The exception to this rule is if we
   // failed in verification, in which case v2 5.4.1 says we need to re-throw
@@ -131,8 +161,11 @@
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
     std::string extra;
-    if (c->GetVerifyErrorClass() != nullptr) {
-      extra = PrettyDescriptor(c->GetVerifyErrorClass());
+    if (c->GetVerifyError() != nullptr) {
+      mirror::Class* descr_from = c->GetVerifyError()->IsClass()
+                                      ? c->GetVerifyError()->AsClass()
+                                      : c->GetVerifyError()->GetClass();
+      extra = PrettyDescriptor(descr_from);
     }
     LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra;
   }
@@ -144,20 +177,16 @@
     mirror::Throwable* pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
     self->SetException(pre_allocated);
   } else {
-    if (c->GetVerifyErrorClass() != nullptr) {
-      // TODO: change the verifier to store an _instance_, with a useful detail message?
-      // It's possible the exception doesn't have a <init>(String).
-      std::string temp;
-      const char* descriptor = c->GetVerifyErrorClass()->GetDescriptor(&temp);
-
-      if (HasInitWithString(self, descriptor)) {
-        self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
-      } else {
-        self->ThrowNewException(descriptor, nullptr);
-      }
-    } else {
-      self->ThrowNewException("Ljava/lang/NoClassDefFoundError;",
-                              PrettyDescriptor(c).c_str());
+    if (c->GetVerifyError() != nullptr) {
+      // Rethrow stored error.
+      HandleEarlierVerifyError(self, this, c);
+    }
+    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+      // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
+      // the top-level exception must be a NoClassDefFoundError. The potentially already pending
+      // exception will be a cause.
+      self->ThrowNewWrappedException("Ljava/lang/NoClassDefFoundError;",
+                                     PrettyDescriptor(c).c_str());
     }
   }
 }
@@ -847,8 +876,8 @@
       hs.NewHandle(dex_caches_object->AsObjectArray<mirror::DexCache>()));
 
   Handle<mirror::ObjectArray<mirror::Class>> class_roots(hs.NewHandle(
-          space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
-          AsObjectArray<mirror::Class>()));
+      space->GetImageHeader().GetImageRoot(ImageHeader::kClassRoots)->
+      AsObjectArray<mirror::Class>()));
   class_roots_ = GcRoot<mirror::ObjectArray<mirror::Class>>(class_roots.Get());
 
   // Special case of setting up the String class early so that we can test arbitrary objects
@@ -857,7 +886,7 @@
 
   mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
-  Runtime::Current()->SetSentinel(Runtime::Current()->GetHeap()->AllocObject<true>(self,
+  Runtime::Current()->SetSentinel(heap->AllocObject<true>(self,
                                                           java_lang_Object,
                                                           java_lang_Object->GetObjectSize(),
                                                           VoidFunctor()));
@@ -2126,8 +2155,6 @@
         last_field_idx = field_idx;
       }
     }
-    klass->SetSFieldsPtr(sfields);
-    DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     // Load instance fields.
     LengthPrefixedArray<ArtField>* ifields = AllocArtFieldArray(self,
                                                                 allocator,
@@ -2149,8 +2176,17 @@
       LOG(WARNING) << "Duplicate fields in class " << PrettyDescriptor(klass.Get())
           << " (unique static fields: " << num_sfields << "/" << it.NumStaticFields()
           << ", unique instance fields: " << num_ifields << "/" << it.NumInstanceFields() << ")";
-      // NOTE: Not shrinking the over-allocated sfields/ifields.
+      // NOTE: Not shrinking the over-allocated sfields/ifields, just setting size.
+      if (sfields != nullptr) {
+        sfields->SetSize(num_sfields);
+      }
+      if (ifields != nullptr) {
+        ifields->SetSize(num_ifields);
+      }
     }
+    // Set the field arrays.
+    klass->SetSFieldsPtr(sfields);
+    DCHECK_EQ(klass->NumStaticFields(), num_sfields);
     klass->SetIFieldsPtr(ifields);
     DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
     // Load methods.
@@ -3399,7 +3435,7 @@
 
     // Was the class already found to be erroneous? Done under the lock to match the JLS.
     if (klass->IsErroneous()) {
-      ThrowEarlierClassFailure(klass.Get());
+      ThrowEarlierClassFailure(klass.Get(), true);
       VlogClassInitializationFailure(klass);
       return false;
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 392efd2..a35ba3e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -848,15 +848,10 @@
 
   // Throw the class initialization failure recorded when first trying to initialize the given
   // class.
-  // Note: Currently we only store the descriptor, so we cannot throw the exact throwable, only
-  //       a recreation with a custom string.
-  void ThrowEarlierClassFailure(mirror::Class* c)
+  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  bool HasInitWithString(Thread* self, const char* descriptor)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_);
-
   bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 04b8900..2c086c5 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -515,7 +515,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, sfields_), "sFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_class_), "verifyErrorClass");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
     addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_), "virtualMethods");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index e523fbb..13d0b84 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -3284,9 +3284,9 @@
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
       // We don't need to deoptimize if the method has not been compiled.
-      ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-      const bool is_compiled = class_linker->GetOatMethodQuickCodeFor(m) != nullptr;
+      const bool is_compiled = m->HasAnyCompiledCode();
       if (is_compiled) {
+        ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
         // If the method may be called through its direct code pointer (without loading
         // its updated entrypoint), we need full deoptimization to not miss the breakpoint.
         if (class_linker->MayBeCalledWithDirectCodePointer(m)) {
@@ -4069,13 +4069,15 @@
   if (is_constructor) {
     // If we invoked a constructor (which actually returns void), return the receiver,
     // unless we threw, in which case we return null.
-    result_tag = JDWP::JT_OBJECT;
+    DCHECK_EQ(JDWP::JT_VOID, result_tag);
     if (exceptionObjectId == 0) {
       // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the
       // object registry.
       result_value = GetObjectRegistry()->Add(pReq->receiver.Read());
+      result_tag = TagFromObject(soa, pReq->receiver.Read());
     } else {
       result_value = 0;
+      result_tag = JDWP::JT_OBJECT;
     }
   }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index e57569e..87e29ae 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -364,36 +364,34 @@
       (reinterpret_cast<uint8_t*>(sp) + callee_return_pc_offset));
   ArtMethod* outer_method = *caller_sp;
   ArtMethod* caller = outer_method;
-
-  if (outer_method != nullptr) {
-    const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
-    if (current_code->IsOptimized()) {
-      if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
-        uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-        CodeInfo code_info = current_code->GetOptimizedCodeInfo();
-        StackMapEncoding encoding = code_info.ExtractEncoding();
-        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
-        DCHECK(stack_map.IsValid());
-        if (stack_map.HasInlineInfo(encoding)) {
-          InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
-          caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
-        }
-      } else {
-        // We're instrumenting, just use the StackVisitor which knows how to
-        // handle instrumented frames.
-        NthCallerVisitor visitor(Thread::Current(), 1, true);
-        visitor.WalkStack();
-        caller = visitor.caller;
+  if (LIKELY(caller_pc != reinterpret_cast<uintptr_t>(GetQuickInstrumentationExitPc()))) {
+    if (outer_method != nullptr) {
+      const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
+      if (current_code->IsOptimized()) {
+          uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+          CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+          StackMapEncoding encoding = code_info.ExtractEncoding();
+          StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+          DCHECK(stack_map.IsValid());
+          if (stack_map.HasInlineInfo(encoding)) {
+            InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+            caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+          }
       }
     }
-  }
-
-  if (kIsDebugBuild && do_caller_check) {
-    // Note that do_caller_check is optional, as this method can be called by
-    // stubs, and tests without a proper call stack.
+    if (kIsDebugBuild && do_caller_check) {
+      // Note that do_caller_check is optional, as this method can be called by
+      // stubs, and tests without a proper call stack.
+      NthCallerVisitor visitor(Thread::Current(), 1, true);
+      visitor.WalkStack();
+      CHECK_EQ(caller, visitor.caller);
+    }
+  } else {
+    // We're instrumenting, just use the StackVisitor which knows how to
+    // handle instrumented frames.
     NthCallerVisitor visitor(Thread::Current(), 1, true);
     visitor.WalkStack();
-    CHECK_EQ(caller, visitor.caller);
+    caller = visitor.caller;
   }
 
   return caller;
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 5afd28e..f691151 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -26,7 +26,6 @@
 #include "jit_instrumentation.h"
 #include "runtime.h"
 #include "runtime_options.h"
-#include "thread_list.h"
 #include "utils.h"
 
 namespace art {
@@ -145,7 +144,7 @@
 
 void Jit::DeleteThreadPool() {
   if (instrumentation_cache_.get() != nullptr) {
-    instrumentation_cache_->DeleteThreadPool();
+    instrumentation_cache_->DeleteThreadPool(Thread::Current());
   }
 }
 
@@ -164,16 +163,8 @@
 
 void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
   CHECK_GT(compile_threshold, 0U);
-  ScopedSuspendAll ssa(__FUNCTION__);
-  // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
-  // something.
   instrumentation_cache_.reset(
       new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
-  Runtime::Current()->GetInstrumentation()->AddListener(
-      new jit::JitInstrumentationListener(instrumentation_cache_.get()),
-      instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch |
-      instrumentation::Instrumentation::kInvokeVirtualOrInterface);
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 4c7cb1e..a291a09 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -117,6 +117,16 @@
   return code_map_->Begin() <= ptr && ptr < code_map_->End();
 }
 
+bool JitCodeCache::ContainsMethod(ArtMethod* method) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto& it : method_code_map_) {
+    if (it.second == method) {
+      return true;
+    }
+  }
+  return false;
+}
+
 class ScopedCodeCacheWrite {
  public:
   explicit ScopedCodeCacheWrite(MemMap* code_map) : code_map_(code_map) {
@@ -248,45 +258,64 @@
 
   OatQuickMethodHeader* method_header = nullptr;
   uint8_t* code_ptr = nullptr;
-
-  ScopedThreadSuspension sts(self, kSuspended);
-  MutexLock mu(self, lock_);
-  WaitForPotentialCollectionToComplete(self);
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    uint8_t* result = reinterpret_cast<uint8_t*>(
-        mspace_memalign(code_mspace_, alignment, total_size));
-    if (result == nullptr) {
-      return nullptr;
-    }
-    code_ptr = result + header_size;
-    DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    WaitForPotentialCollectionToComplete(self);
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      uint8_t* result = reinterpret_cast<uint8_t*>(
+          mspace_memalign(code_mspace_, alignment, total_size));
+      if (result == nullptr) {
+        return nullptr;
+      }
+      code_ptr = result + header_size;
+      DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(code_ptr), alignment);
 
-    std::copy(code, code + code_size, code_ptr);
-    method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-    new (method_header) OatQuickMethodHeader(
-        (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
-        (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
-        (gc_map == nullptr) ? 0 : code_ptr - gc_map,
-        frame_size_in_bytes,
-        core_spill_mask,
-        fp_spill_mask,
-        code_size);
+      std::copy(code, code + code_size, code_ptr);
+      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      new (method_header) OatQuickMethodHeader(
+          (mapping_table == nullptr) ? 0 : code_ptr - mapping_table,
+          (vmap_table == nullptr) ? 0 : code_ptr - vmap_table,
+          (gc_map == nullptr) ? 0 : code_ptr - gc_map,
+          frame_size_in_bytes,
+          core_spill_mask,
+          fp_spill_mask,
+          code_size);
+    }
+
+    __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
+                            reinterpret_cast<char*>(code_ptr + code_size));
+  }
+  // We need to update the entry point in the runnable state for the instrumentation.
+  {
+    MutexLock mu(self, lock_);
+    method_code_map_.Put(code_ptr, method);
+    Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+        method, method_header->GetEntryPoint());
+    if (collection_in_progress_) {
+      // We need to update the live bitmap if there is a GC to ensure it sees this new
+      // code.
+      GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
+    }
+    VLOG(jit)
+        << "JIT added "
+        << PrettyMethod(method) << "@" << method
+        << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
+        << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
   }
 
-  __builtin___clear_cache(reinterpret_cast<char*>(code_ptr),
-                          reinterpret_cast<char*>(code_ptr + code_size));
-  method_code_map_.Put(code_ptr, method);
-  // We have checked there was no collection in progress earlier. If we
-  // were, setting the entry point of a method would be unsafe, as the collection
-  // could delete it.
-  DCHECK(!collection_in_progress_);
-  method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
   return reinterpret_cast<uint8_t*>(method_header);
 }
 
 size_t JitCodeCache::CodeCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
+  return CodeCacheSizeLocked();
+}
+
+size_t JitCodeCache::CodeCacheSizeLocked() {
   size_t bytes_allocated = 0;
   mspace_inspect_all(code_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
   return bytes_allocated;
@@ -294,6 +323,10 @@
 
 size_t JitCodeCache::DataCacheSize() {
   MutexLock mu(Thread::Current(), lock_);
+  return DataCacheSizeLocked();
+}
+
+size_t JitCodeCache::DataCacheSizeLocked() {
   size_t bytes_allocated = 0;
   mspace_inspect_all(data_mspace_, DlmallocBytesAllocatedCallback, &bytes_allocated);
   return bytes_allocated;
@@ -304,6 +337,11 @@
   return method_code_map_.size();
 }
 
+void JitCodeCache::ClearData(Thread* self, void* data) {
+  MutexLock mu(self, lock_);
+  mspace_free(data_mspace_, data);
+}
+
 uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
   size = RoundUp(size, sizeof(void*));
   uint8_t* result = nullptr;
@@ -402,20 +440,24 @@
               << ", data=" << PrettySize(DataCacheSize());
   }
 
-  size_t map_size = 0;
-  ScopedThreadSuspension sts(self, kSuspended);
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
 
+  // Wait for an existing collection, or let everyone know we are starting one.
+  {
+    ScopedThreadSuspension sts(self, kSuspended);
+    MutexLock mu(self, lock_);
+    if (WaitForPotentialCollectionToComplete(self)) {
+      return;
+    } else {
+      collection_in_progress_ = true;
+    }
+  }
   // Walk over all compiled methods and set the entry points of these
   // methods to interpreter.
   {
     MutexLock mu(self, lock_);
-    if (WaitForPotentialCollectionToComplete(self)) {
-      return;
-    }
-    collection_in_progress_ = true;
-    map_size = method_code_map_.size();
     for (auto& it : method_code_map_) {
-      it.second->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+      instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
       info->GetMethod()->SetProfilingInfo(nullptr);
@@ -426,16 +468,12 @@
   {
     Barrier barrier(0);
     size_t threads_running_checkpoint = 0;
-    {
-      // Walking the stack requires the mutator lock.
-      // We only take the lock when running the checkpoint and not waiting so that
-      // when we go back to suspended, we can execute checkpoints that were requested
-      // concurrently, and then move to waiting for our own checkpoint to finish.
-      ScopedObjectAccess soa(self);
-      MarkCodeClosure closure(this, &barrier);
-      threads_running_checkpoint =
-          Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
-    }
+    MarkCodeClosure closure(this, &barrier);
+    threads_running_checkpoint =
+        Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+    // Now that we have run our checkpoint, move to a suspended state and wait
+    // for other threads to run the checkpoint.
+    ScopedThreadSuspension sts(self, kSuspended);
     if (threads_running_checkpoint != 0) {
       barrier.Increment(self, threads_running_checkpoint);
     }
@@ -443,7 +481,6 @@
 
   {
     MutexLock mu(self, lock_);
-    DCHECK_EQ(map_size, method_code_map_.size());
     // Free unused compiled code, and restore the entry point of used compiled code.
     {
       ScopedCodeCacheWrite scc(code_map_.get());
@@ -453,7 +490,7 @@
         uintptr_t allocation = FromCodeToAllocation(code_ptr);
         const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
         if (GetLiveBitmap()->Test(allocation)) {
-          method->SetEntryPointFromQuickCompiledCode(method_header->GetEntryPoint());
+          instrumentation->UpdateMethodsCode(method, method_header->GetEntryPoint());
           ++it;
         } else {
           method->ClearCounter();
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index e10f962..131446c 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -83,11 +83,19 @@
   // Return true if the code cache contains this pc.
   bool ContainsPc(const void* pc) const;
 
+  // Return true if the code cache contains this method.
+  bool ContainsMethod(ArtMethod* method) REQUIRES(!lock_);
+
   // Reserve a region of data of size at least "size". Returns null if there is no more room.
   uint8_t* ReserveData(Thread* self, size_t size)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
+  // Clear data from the data portion of the code cache.
+  void ClearData(Thread* self, void* data)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Add a data array of size (end - begin) with the associated contents, returns null if there
   // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
@@ -158,6 +166,12 @@
   // Free in the mspace allocations taken by 'method'.
   void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
 
+  // Number of bytes allocated in the code cache.
+  size_t CodeCacheSizeLocked() REQUIRES(lock_);
+
+  // Number of bytes allocated in the data cache.
+  size_t DataCacheSizeLocked() REQUIRES(lock_);
+
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 7931306..6531325 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -20,6 +20,7 @@
 #include "jit.h"
 #include "jit_code_cache.h"
 #include "scoped_thread_state_change.h"
+#include "thread_list.h"
 
 namespace art {
 namespace jit {
@@ -73,16 +74,48 @@
 JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
                                                  size_t warm_method_threshold)
     : hot_method_threshold_(hot_method_threshold),
-      warm_method_threshold_(warm_method_threshold) {
+      warm_method_threshold_(warm_method_threshold),
+      listener_(this) {
 }
 
 void JitInstrumentationCache::CreateThreadPool() {
+  // Create the thread pool before setting the instrumentation, so that
+  // when the threads stopped being suspended, they can use it directly.
+  // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
+  // is not null when we instrument.
   thread_pool_.reset(new ThreadPool("Jit thread pool", 1));
+  thread_pool_->StartWorkers(Thread::Current());
+  {
+    // Add Jit interpreter instrumentation, tells the interpreter when
+    // to notify the jit to compile something.
+    ScopedSuspendAll ssa(__FUNCTION__);
+    Runtime::Current()->GetInstrumentation()->AddListener(
+        &listener_, JitInstrumentationListener::kJitEvents);
+  }
 }
 
-void JitInstrumentationCache::DeleteThreadPool() {
-  DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
-  thread_pool_.reset();
+void JitInstrumentationCache::DeleteThreadPool(Thread* self) {
+  DCHECK(Runtime::Current()->IsShuttingDown(self));
+  if (thread_pool_ != nullptr) {
+    // First remove the listener, to avoid having mutators enter
+    // 'AddSamples'.
+    ThreadPool* cache = nullptr;
+    {
+      ScopedSuspendAll ssa(__FUNCTION__);
+      Runtime::Current()->GetInstrumentation()->RemoveListener(
+          &listener_, JitInstrumentationListener::kJitEvents);
+      // Clear thread_pool_ field while the threads are suspended.
+      // A mutator in the 'AddSamples' method will check against it.
+      cache = thread_pool_.release();
+    }
+    cache->StopWorkers(self);
+    cache->RemoveAllTasks(self);
+    // We could just suspend all threads, but we know those threads
+    // will finish in a short period, so it's not worth adding a suspend logic
+    // here. Besides, this is only done for shutdown.
+    cache->Wait(self, false, false);
+    delete cache;
+  }
 }
 
 void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
@@ -91,25 +124,32 @@
   if (method->IsClassInitializer() || method->IsNative()) {
     return;
   }
-  if (thread_pool_.get() == nullptr) {
-    DCHECK(Runtime::Current()->IsShuttingDown(self));
-    return;
-  }
+  DCHECK(thread_pool_ != nullptr);
+
   uint16_t sample_count = method->IncrementCounter();
   if (sample_count == warm_method_threshold_) {
-    if (ProfilingInfo::Create(self, method, /* retry_allocation */ false)) {
+    bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
+    if (success) {
       VLOG(jit) << "Start profiling " << PrettyMethod(method);
-    } else {
+    }
+
+    if (thread_pool_ == nullptr) {
+      // Calling ProfilingInfo::Create might put us in a suspended state, which could
+      // lead to the thread pool being deleted when we are shutting down.
+      DCHECK(Runtime::Current()->IsShuttingDown(self));
+      return;
+    }
+
+    if (!success) {
       // We failed allocating. Instead of doing the collection on the Java thread, we push
       // an allocation to a compiler thread, that will do the collection.
       thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kAllocateProfile));
-      thread_pool_->StartWorkers(self);
     }
   }
 
   if (sample_count == hot_method_threshold_) {
+    DCHECK(thread_pool_ != nullptr);
     thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
-    thread_pool_->StartWorkers(self);
   }
 }
 
@@ -118,6 +158,20 @@
   CHECK(instrumentation_cache_ != nullptr);
 }
 
+void JitInstrumentationListener::MethodEntered(Thread* thread,
+                                               mirror::Object* /*this_object*/,
+                                               ArtMethod* method,
+                                               uint32_t /*dex_pc*/) {
+  instrumentation_cache_->AddSamples(thread, method, 1);
+}
+
+void JitInstrumentationListener::BackwardBranch(Thread* thread,
+                                                ArtMethod* method,
+                                                int32_t dex_pc_offset) {
+  CHECK_LE(dex_pc_offset, 0);
+  instrumentation_cache_->AddSamples(thread, method, 1);
+}
+
 void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
                                                           mirror::Object* this_object,
                                                           ArtMethod* caller,
@@ -138,7 +192,9 @@
 }
 
 void JitInstrumentationCache::WaitForCompilationToFinish(Thread* self) {
-  thread_pool_->Wait(self, false, false);
+  if (thread_pool_ != nullptr) {
+    thread_pool_->Wait(self, false, false);
+  }
 }
 
 }  // namespace jit
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 9eb464b..1f96d59 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -31,7 +31,6 @@
 
 namespace art {
 namespace mirror {
-  class Class;
   class Object;
   class Throwable;
 }  // namespace mirror
@@ -42,24 +41,7 @@
 
 namespace jit {
 
-// Keeps track of which methods are hot.
-class JitInstrumentationCache {
- public:
-  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
-  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_);
-  void CreateThreadPool();
-  void DeleteThreadPool();
-  // Wait until there is no more pending compilation tasks.
-  void WaitForCompilationToFinish(Thread* self);
-
- private:
-  size_t hot_method_threshold_;
-  size_t warm_method_threshold_;
-  std::unique_ptr<ThreadPool> thread_pool_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
-};
+class JitInstrumentationCache;
 
 class JitInstrumentationListener : public instrumentation::InstrumentationListener {
  public:
@@ -67,9 +49,8 @@
 
   void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
                      ArtMethod* method, uint32_t /*dex_pc*/)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
   void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
                     ArtMethod* /*method*/, uint32_t /*dex_pc*/,
                     const JValue& /*return_value*/)
@@ -90,10 +71,7 @@
                   ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
 
   void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
-      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
-    CHECK_LE(dex_pc_offset, 0);
-    instrumentation_cache_->AddSamples(thread, method, 1);
-  }
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
   void InvokeVirtualOrInterface(Thread* thread,
                                 mirror::Object* this_object,
@@ -102,12 +80,37 @@
                                 ArtMethod* callee)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
 
+  static constexpr uint32_t kJitEvents =
+      instrumentation::Instrumentation::kMethodEntered |
+      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kInvokeVirtualOrInterface;
+
  private:
   JitInstrumentationCache* const instrumentation_cache_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationListener);
 };
 
+// Keeps track of which methods are hot.
+class JitInstrumentationCache {
+ public:
+  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
+  void AddSamples(Thread* self, ArtMethod* method, size_t samples)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  void CreateThreadPool();
+  void DeleteThreadPool(Thread* self);
+  // Wait until there is no more pending compilation tasks.
+  void WaitForCompilationToFinish(Thread* self);
+
+ private:
+  size_t hot_method_threshold_;
+  size_t warm_method_threshold_;
+  JitInstrumentationListener listener_;
+  std::unique_ptr<ThreadPool> thread_pool_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
+};
+
 }  // namespace jit
 }  // namespace art
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 234a733..415109f 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -1670,7 +1670,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -1684,7 +1684,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(java_string);
     ScopedObjectAccess soa(env);
     mirror::String* s = soa.Decode<mirror::String*>(java_string);
-    if (start < 0 || length < 0 || start + length > s->GetLength()) {
+    if (start < 0 || length < 0 || length > s->GetLength() - start) {
       ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2473,7 +2473,7 @@
                                                               "GetPrimitiveArrayRegion",
                                                               "get region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "src");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
@@ -2493,7 +2493,7 @@
                                                               "SetPrimitiveArrayRegion",
                                                               "set region of");
     if (array != nullptr) {
-      if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      if (start < 0 || length < 0 || length > array->GetLength() - start) {
         ThrowAIOOBE(soa, array, start, length, "dst");
       } else {
         CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 41b368e..649df5f 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1077,6 +1077,12 @@
   env_->set_region_fn(a, size - 1, size, nullptr); \
   ExpectException(aioobe_); \
   \
+  /* Regression test against integer overflow in range check. */ \
+  env_->get_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  env_->set_region_fn(a, 0x7fffffff, 0x7fffffff, nullptr); \
+  ExpectException(aioobe_); \
+  \
   /* It's okay for the buffer to be null as long as the length is 0. */ \
   env_->get_region_fn(a, 2, 0, nullptr); \
   /* Even if the offset is invalid... */ \
@@ -1507,6 +1513,9 @@
   ExpectException(sioobe_);
   env_->GetStringRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   jchar chars[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringRegion(s, 1, 2, &chars[1]);
@@ -1529,6 +1538,9 @@
   ExpectException(sioobe_);
   env_->GetStringUTFRegion(s, 10, 1, nullptr);
   ExpectException(sioobe_);
+  // Regression test against integer overflow in range check.
+  env_->GetStringUTFRegion(s, 0x7fffffff, 0x7fffffff, nullptr);
+  ExpectException(sioobe_);
 
   char bytes[4] = { 'x', 'x', 'x', 'x' };
   env_->GetStringUTFRegion(s, 1, 2, &bytes[1]);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 19ee7f4..174de0e 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -520,15 +520,6 @@
   }
 }
 
-inline void Class::SetVerifyErrorClass(Class* klass) {
-  CHECK(klass != nullptr) << PrettyClass(this);
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass);
-  } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass);
-  }
-}
-
 template<VerifyObjectFlags kVerifyFlags>
 inline uint32_t Class::GetAccessFlags() {
   // Check class is loaded/retired or this is java.lang.String that has a
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 9d01a1d..91e1cec 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -57,6 +57,15 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+inline void Class::SetVerifyError(mirror::Object* error) {
+  CHECK(error != nullptr) << PrettyClass(this);
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+  } else {
+    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+  }
+}
+
 void Class::SetStatus(Handle<Class> h_this, Status new_status, Thread* self) {
   Status old_status = h_this->GetStatus();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -85,36 +94,9 @@
       }
     }
 
-    // Stash current exception.
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
-    CHECK(old_exception.Get() != nullptr);
-    Class* eiie_class;
-    // Do't attempt to use FindClass if we have an OOM error since this can try to do more
-    // allocations and may cause infinite loops.
-    bool throw_eiie = (old_exception.Get() == nullptr);
-    if (!throw_eiie) {
-      std::string temp;
-      const char* old_exception_descriptor = old_exception->GetClass()->GetDescriptor(&temp);
-      throw_eiie = (strcmp(old_exception_descriptor, "Ljava/lang/OutOfMemoryError;") != 0);
-    }
-    if (throw_eiie) {
-      // Clear exception to call FindSystemClass.
-      self->ClearException();
-      eiie_class = Runtime::Current()->GetClassLinker()->FindSystemClass(
-          self, "Ljava/lang/ExceptionInInitializerError;");
-      CHECK(!self->IsExceptionPending());
-      // Only verification errors, not initialization problems, should set a verify error.
-      // This is to ensure that ThrowEarlierClassFailure will throw NoClassDefFoundError in that
-      // case.
-      Class* exception_class = old_exception->GetClass();
-      if (!eiie_class->IsAssignableFrom(exception_class)) {
-        h_this->SetVerifyErrorClass(exception_class);
-      }
-    }
-
-    // Restore exception.
-    self->SetException(old_exception.Get());
+    // Remember the current exception.
+    CHECK(self->GetException() != nullptr);
+    h_this->SetVerifyError(self->GetException());
   }
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 8219d69..c4339b9 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1015,9 +1015,9 @@
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) SHARED_REQUIRES(Locks::mutator_lock_);
 
-  Class* GetVerifyErrorClass() SHARED_REQUIRES(Locks::mutator_lock_) {
+  Object* GetVerifyError() SHARED_REQUIRES(Locks::mutator_lock_) {
     // DCHECK(IsErroneous());
-    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_));
+    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_));
   }
 
   uint16_t GetDexClassDefIndex() SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -1158,7 +1158,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
-  void SetVerifyErrorClass(Class* klass) SHARED_REQUIRES(Locks::mutator_lock_);
+  void SetVerifyError(Object* klass) SHARED_REQUIRES(Locks::mutator_lock_);
 
   template <bool throw_on_failure, bool use_referrers_cache>
   bool ResolvedFieldAccessTest(Class* access_to, ArtField* field,
@@ -1230,8 +1230,9 @@
   // check for interfaces and return null.
   HeapReference<Class> super_class_;
 
-  // If class verify fails, we must return same error on subsequent tries.
-  HeapReference<Class> verify_error_class_;
+  // If class verify fails, we must return same error on subsequent tries. We may store either
+  // the class of the error, or an actual instance of Throwable here.
+  HeapReference<Object> verify_error_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 81e7e6d..da21fee 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -27,7 +27,7 @@
 #include "base/time_utils.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -1034,15 +1034,15 @@
   for (uint32_t monitor_dex_pc : monitor_enter_dex_pcs) {
     // The verifier works in terms of the dex pcs of the monitor-enter instructions.
     // We want the registers used by those instructions (so we can read the values out of them).
-    uint16_t monitor_enter_instruction = code_item->insns_[monitor_dex_pc];
+    const Instruction* monitor_enter_instruction =
+        Instruction::At(&code_item->insns_[monitor_dex_pc]);
 
     // Quick sanity check.
-    if ((monitor_enter_instruction & 0xff) != Instruction::MONITOR_ENTER) {
-      LOG(FATAL) << "expected monitor-enter @" << monitor_dex_pc << "; was "
-                 << reinterpret_cast<void*>(monitor_enter_instruction);
-    }
+    CHECK_EQ(monitor_enter_instruction->Opcode(), Instruction::MONITOR_ENTER)
+      << "expected monitor-enter @" << monitor_dex_pc << "; was "
+      << reinterpret_cast<const void*>(monitor_enter_instruction);
 
-    uint16_t monitor_register = ((monitor_enter_instruction >> 8) & 0xff);
+    uint16_t monitor_register = monitor_enter_instruction->VRegA();
     uint32_t value;
     bool success = stack_visitor->GetVReg(m, monitor_register, kReferenceVReg, &value);
     CHECK(success) << "Failed to read v" << monitor_register << " of kind "
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 7b1fdb2..a8ba19b 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -151,6 +151,11 @@
     return compiler_callbacks_;
   }
 
+  void SetCompilerCallbacks(CompilerCallbacks* callbacks) {
+    CHECK(callbacks != nullptr);
+    compiler_callbacks_ = callbacks;
+  }
+
   bool IsZygote() const {
     return is_zygote_;
   }
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index 0527d3a..5a4dfb8 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -82,6 +82,11 @@
   }
 }
 
+void ThreadPool::RemoveAllTasks(Thread* self) {
+  MutexLock mu(self, task_queue_lock_);
+  tasks_.clear();
+}
+
 ThreadPool::ThreadPool(const char* name, size_t num_threads)
   : name_(name),
     task_queue_lock_("task queue lock"),
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index a2338d6..6cd4ad3 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -91,6 +91,9 @@
   // after running it, it is the caller's responsibility.
   void AddTask(Thread* self, Task* task) REQUIRES(!task_queue_lock_);
 
+  // Remove all tasks in the queue.
+  void RemoveAllTasks(Thread* self) REQUIRES(!task_queue_lock_);
+
   ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 48dce63..68db7e3 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1434,7 +1434,8 @@
     execv(program, &args[0]);
 
     PLOG(ERROR) << "Failed to execv(" << command_line << ")";
-    exit(1);
+    // _exit to avoid atexit handlers in child.
+    _exit(1);
   } else {
     if (pid == -1) {
       *error_msg = StringPrintf("Failed to execv(%s) because fork failed: %s",
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 2db79ab..aae0317 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -2045,6 +2045,10 @@
           } else if (reg_type.IsUninitializedTypes()) {
             Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "returning uninitialized object '"
                                               << reg_type << "'";
+          } else if (!reg_type.IsReferenceTypes()) {
+            // We really do expect a reference here.
+            Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-object returns a non-reference type "
+                                              << reg_type;
           } else if (!return_type.IsAssignableFrom(reg_type)) {
             if (reg_type.IsUnresolvedTypes() || return_type.IsUnresolvedTypes()) {
               Fail(VERIFY_ERROR_NO_CLASS) << " can't resolve returned type '" << return_type
diff --git a/test/008-exceptions/expected.txt b/test/008-exceptions/expected.txt
index 92c79dc..083ecf7 100644
--- a/test/008-exceptions/expected.txt
+++ b/test/008-exceptions/expected.txt
@@ -1,12 +1,17 @@
 Got an NPE: second throw
 java.lang.NullPointerException: second throw
-	at Main.catchAndRethrow(Main.java:58)
-	at Main.exceptions_007(Main.java:41)
-	at Main.main(Main.java:49)
+	at Main.catchAndRethrow(Main.java:77)
+	at Main.exceptions_007(Main.java:59)
+	at Main.main(Main.java:67)
 Caused by: java.lang.NullPointerException: first throw
-	at Main.throwNullPointerException(Main.java:65)
-	at Main.catchAndRethrow(Main.java:55)
+	at Main.throwNullPointerException(Main.java:84)
+	at Main.catchAndRethrow(Main.java:74)
 	... 2 more
 Static Init
-BadError: This is bad by convention
-BadError: This is bad by convention
+BadError: This is bad by convention: BadInit
+java.lang.NoClassDefFoundError: BadInit
+BadError: This is bad by convention: BadInit
+Static BadInitNoStringInit
+BadErrorNoStringInit: This is bad by convention
+java.lang.NoClassDefFoundError: BadInitNoStringInit
+BadErrorNoStringInit: This is bad by convention
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 7f6d0c5..9e3477a 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -14,20 +14,38 @@
  * limitations under the License.
  */
 
-// An exception that doesn't have a <init>(String) method.
+// An error class.
 class BadError extends Error {
-    public BadError() {
-        super("This is bad by convention");
+    public BadError(String s) {
+        super("This is bad by convention: " + s);
     }
 }
 
-// A class that throws BadException during static initialization.
+// A class that throws BadError during static initialization.
 class BadInit {
     static int dummy;
     static {
         System.out.println("Static Init");
         if (true) {
-            throw new BadError();
+            throw new BadError("BadInit");
+        }
+    }
+}
+
+// An error that doesn't have a <init>(String) method.
+class BadErrorNoStringInit extends Error {
+    public BadErrorNoStringInit() {
+        super("This is bad by convention");
+    }
+}
+
+// A class that throws BadErrorNoStringInit during static initialization.
+class BadInitNoStringInit {
+    static int dummy;
+    static {
+        System.out.println("Static BadInitNoStringInit");
+        if (true) {
+            throw new BadErrorNoStringInit();
         }
     }
 }
@@ -48,6 +66,7 @@
     public static void main (String args[]) {
         exceptions_007();
         exceptionsRethrowClassInitFailure();
+        exceptionsRethrowClassInitFailureNoStringInit();
     }
 
     private static void catchAndRethrow() {
@@ -79,8 +98,32 @@
             try {
                 BadInit.dummy = 1;
                 throw new IllegalStateException("Should not reach here.");
-            } catch (BadError e) {
+            } catch (NoClassDefFoundError e) {
                 System.out.println(e);
+                System.out.println(e.getCause());
+            }
+        } catch (Exception error) {
+            error.printStackTrace();
+        }
+    }
+
+    private static void exceptionsRethrowClassInitFailureNoStringInit() {
+        try {
+            try {
+                BadInitNoStringInit.dummy = 1;
+                throw new IllegalStateException("Should not reach here.");
+            } catch (BadErrorNoStringInit e) {
+                System.out.println(e);
+            }
+
+            // Check if it works a second time.
+
+            try {
+                BadInitNoStringInit.dummy = 1;
+                throw new IllegalStateException("Should not reach here.");
+            } catch (NoClassDefFoundError e) {
+                System.out.println(e);
+                System.out.println(e.getCause());
             }
         } catch (Exception error) {
             error.printStackTrace();
diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java
index 96302fb..6d6a4f2 100644
--- a/test/441-checker-inliner/src/Main.java
+++ b/test/441-checker-inliner/src/Main.java
@@ -19,7 +19,7 @@
   /// CHECK-START: void Main.InlineVoid() inliner (before)
   /// CHECK-DAG:     <<Const42:i\d+>> IntConstant 42
   /// CHECK-DAG:                      InvokeStaticOrDirect
-  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>,{{[ij]\d+}}]
+  /// CHECK-DAG:                      InvokeStaticOrDirect [<<Const42>>{{(,[ij]\d+)?}}]
 
   /// CHECK-START: void Main.InlineVoid() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
@@ -31,7 +31,7 @@
 
   /// CHECK-START: int Main.InlineParameter(int) inliner (before)
   /// CHECK-DAG:     <<Param:i\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: int Main.InlineParameter(int) inliner (after)
@@ -44,7 +44,7 @@
 
   /// CHECK-START: long Main.InlineWideParameter(long) inliner (before)
   /// CHECK-DAG:     <<Param:j\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: long Main.InlineWideParameter(long) inliner (after)
@@ -57,7 +57,7 @@
 
   /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (before)
   /// CHECK-DAG:     <<Param:l\d+>>  ParameterValue
-  /// CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:                     Return [<<Result>>]
 
   /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (after)
@@ -128,8 +128,8 @@
   /// CHECK-DAG:     <<Const1:i\d+>> IntConstant 1
   /// CHECK-DAG:     <<Const3:i\d+>> IntConstant 3
   /// CHECK-DAG:     <<Const5:i\d+>> IntConstant 5
-  /// CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>,{{[ij]\d+}}]
-  /// CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>,{{[ij]\d+}}]
+  /// CHECK-DAG:     <<Add:i\d+>>    InvokeStaticOrDirect [<<Const1>>,<<Const3>>{{(,[ij]\d+)?}}]
+  /// CHECK-DAG:     <<Sub:i\d+>>    InvokeStaticOrDirect [<<Const5>>,<<Const3>>{{(,[ij]\d+)?}}]
   /// CHECK-DAG:     <<Phi:i\d+>>    Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                     Return [<<Phi>>]
 
diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java
index e6aab63..cff6273 100644
--- a/test/478-checker-clinit-check-pruning/src/Main.java
+++ b/test/478-checker-clinit-check-pruning/src/Main.java
@@ -67,14 +67,14 @@
    */
 
   /// CHECK-START: void Main.invokeStaticNotInlined() builder (after)
-  /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
+  /// CHECK:         <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
+  /// CHECK:         <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  /// CHECK:                               InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
 
   /// CHECK-START: void Main.invokeStaticNotInlined() inliner (after)
-  /// CHECK-DAG:     <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
-  /// CHECK-DAG:     <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
-  /// CHECK-DAG:                           InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>]
+  /// CHECK:         <<LoadClass:l\d+>>    LoadClass gen_clinit_check:false
+  /// CHECK:         <<ClinitCheck:l\d+>>  ClinitCheck [<<LoadClass>>]
+  /// CHECK:                               InvokeStaticOrDirect [{{([ij]\d+,)?}}<<ClinitCheck>>]
 
   // The following checks ensure the clinit check and load class
   // instructions added by the builder are pruned by the
@@ -83,7 +83,7 @@
   // before the next pass (liveness analysis) instead.
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
-  /// CHECK-DAG:                           InvokeStaticOrDirect
+  /// CHECK:                               InvokeStaticOrDirect
 
   /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before)
   /// CHECK-NOT:                           LoadClass
diff --git a/test/537-checker-jump-over-jump/expected.txt b/test/537-checker-jump-over-jump/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/537-checker-jump-over-jump/expected.txt
diff --git a/test/537-checker-jump-over-jump/info.txt b/test/537-checker-jump-over-jump/info.txt
new file mode 100644
index 0000000..aeb30bb
--- /dev/null
+++ b/test/537-checker-jump-over-jump/info.txt
@@ -0,0 +1 @@
+Test for X86-64 elimination of jump over jump.
diff --git a/test/537-checker-jump-over-jump/src/Main.java b/test/537-checker-jump-over-jump/src/Main.java
new file mode 100644
index 0000000..fb666ea
--- /dev/null
+++ b/test/537-checker-jump-over-jump/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+public class Main {
+  public static int FIBCOUNT = 64;
+  public static int[] fibs;
+
+  /// CHECK-START-X86_64: int Main.test() disassembly (after)
+  /// CHECK:          If
+  /// CHECK-NEXT:     cmp
+  /// CHECK-NEXT:     jnl/ge
+  /// CHECK-NOT:      jmp
+  /// CHECK:          ArrayGet
+  // Checks that there is no conditional jump over a jmp. The ArrayGet is in
+  // the next block.
+  public static int test() {
+    for (int i = 1; ; i++) {
+      if (i >= FIBCOUNT) {
+        return fibs[0];
+      }
+      fibs[i] = (i + fibs[(i - 1)]);
+    }
+  }
+
+  public static void main(String[] args) {
+    fibs = new int[FIBCOUNT];
+    fibs[0] = 1;
+    test();
+  }
+}
diff --git a/test/546-regression-simplify-catch/expected.txt b/test/546-regression-simplify-catch/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/546-regression-simplify-catch/expected.txt
diff --git a/test/546-regression-simplify-catch/info.txt b/test/546-regression-simplify-catch/info.txt
new file mode 100644
index 0000000..b146e87
--- /dev/null
+++ b/test/546-regression-simplify-catch/info.txt
@@ -0,0 +1,2 @@
+Tests simplification of catch blocks in the presence of trivially dead code
+that was not verified by the verifier.
diff --git a/test/546-regression-simplify-catch/smali/TestCase.smali b/test/546-regression-simplify-catch/smali/TestCase.smali
new file mode 100644
index 0000000..486b3b0
--- /dev/null
+++ b/test/546-regression-simplify-catch/smali/TestCase.smali
@@ -0,0 +1,104 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# Test simplification of an empty, dead catch block. Compiler used to segfault
+# because it did expect at least a control-flow instruction (b/25494450).
+
+.method public static testCase_EmptyCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    nop
+
+.end method
+
+# Test simplification of a dead catch block with some code but no control-flow
+# instruction.
+
+.method public static testCase_NoConrolFlowCatch()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    nop
+
+    :catch
+    const v1, 0x3
+    add-int v0, v0, v1
+
+.end method
+
+# Test simplification of a dead catch block with normal-predecessors but
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_InvalidLoadException()I
+    .registers 3
+
+    const v0, 0x0
+    return v0
+
+    :try_start
+    nop
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    :catch
+    move-exception v0
+
+.end method
+
+# Test simplification of a live catch block with dead normal-predecessors and
+# starting with a move-exception. Verifier does not check trivially dead code
+# and this used to trip a DCHECK (b/25492628).
+
+.method public static testCase_TriviallyDeadPredecessor(II)I
+    .registers 3
+
+    :try_start
+    div-int v0, p0, p1
+    return v0
+    :try_end
+    .catchall {:try_start .. :try_end} :catch
+
+    # Trivially dead predecessor block.
+    add-int p0, p0, p1
+
+    :catch
+    # This verifies because only exceptional predecessors are live.
+    move-exception v0
+    const v0, 0x0
+    return v0
+
+.end method
+
diff --git a/test/546-regression-simplify-catch/src/Main.java b/test/546-regression-simplify-catch/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/546-regression-simplify-catch/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/547-regression-trycatch-critical-edge/expected.txt b/test/547-regression-trycatch-critical-edge/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/expected.txt
diff --git a/test/547-regression-trycatch-critical-edge/info.txt b/test/547-regression-trycatch-critical-edge/info.txt
new file mode 100644
index 0000000..dc798c0
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/info.txt
@@ -0,0 +1,2 @@
+Test a specific SSA building regression a back edge would not be split due to
+being on try/catch boundary.
\ No newline at end of file
diff --git a/test/547-regression-trycatch-critical-edge/smali/TestCase.smali b/test/547-regression-trycatch-critical-edge/smali/TestCase.smali
new file mode 100644
index 0000000..53a3cc5
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/smali/TestCase.smali
@@ -0,0 +1,57 @@
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTestCase;
+.super Ljava/lang/Object;
+
+# The following test case would crash liveness analysis because the back edge of
+# the outer loop would have a smaller liveness position than the two back edges
+# of the inner loop. This was caused by a bug which did not split the critical
+# edge between TryBoundary and outer loop header (b/25493695).
+
+.method public static testCase(II)I
+  .registers 10
+
+  const v0, 0x0                                       # v0 = result
+  const v1, 0x1                                       # v1 = const 1
+
+  move v2, p0                                         # v2 = outer loop counter
+  :outer_loop
+  if-eqz v2, :return
+  sub-int/2addr v2, v1
+
+  :try_start
+
+  move v3, p1                                         # v3 = inner loop counter
+  :inner_loop
+  invoke-static {}, Ljava/lang/System;->nanoTime()J   # throwing instruction
+  if-eqz v3, :outer_loop                              # back edge of outer loop
+  sub-int/2addr v3, v1
+
+  invoke-static {}, Ljava/lang/System;->nanoTime()J   # throwing instruction
+  add-int/2addr v0, v1
+  goto :inner_loop                                    # back edge of inner loop
+
+  :try_end
+  .catchall {:try_start .. :try_end} :catch
+
+  :catch
+  const v4, 0x2
+  add-int/2addr v0, v4
+  goto :inner_loop                                    # back edge of inner loop
+
+  :return
+  return v0
+
+.end method
diff --git a/test/547-regression-trycatch-critical-edge/src/Main.java b/test/547-regression-trycatch-critical-edge/src/Main.java
new file mode 100644
index 0000000..8eddac3
--- /dev/null
+++ b/test/547-regression-trycatch-critical-edge/src/Main.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) {}
+
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 17c1f00..a590cf1 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -46,4 +46,5 @@
 b/23300986 (2)
 b/23502994 (if-eqz)
 b/23502994 (check-cast)
+b/25494456
 Done!
diff --git a/test/800-smali/smali/b_25494456.smali b/test/800-smali/smali/b_25494456.smali
new file mode 100644
index 0000000..0675b27
--- /dev/null
+++ b/test/800-smali/smali/b_25494456.smali
@@ -0,0 +1,14 @@
+.class public LB25494456;
+
+.super Ljava/lang/Object;
+
+# Ensure that a type mismatch (integral/float vs reference) overrides a soft failure (because of
+# an unresolvable type) in return-object.
+
+.method public static run()Lwont/be/Resolvable;
+    .registers 1
+
+    const/4 v0, 1
+    return-object v0
+
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f75747d..4844848 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -137,6 +137,8 @@
                 new Object[] { new Object() }, null, null));
         testCases.add(new TestCase("b/23502994 (check-cast)", "B23502994", "runCHECKCAST",
                 new Object[] { "abc" }, null, null));
+        testCases.add(new TestCase("b/25494456", "B25494456", "run", null, new VerifyError(),
+                null));
     }
 
     public void runTests() {
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 18867fd..3efa6ff 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -367,6 +367,7 @@
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
                       --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
+                      --app-image-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.art | cut -d/ -f 2- | sed "s:/:@:g") \
                       --instruction-set=$ISA"
   if [ "x$INSTRUCTION_SET_FEATURES" != "x" ] ; then
     dex2oat_cmdline="${dex2oat_cmdline} --instruction-set-features=${INSTRUCTION_SET_FEATURES}"
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index d6f55aa..aa548cc 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -13,8 +13,6 @@
    - Recommend how to start looking at a heap dump.
    - Say how to enable allocation sites.
    - Where to submit feedback, questions, and bug reports.
- * Submit perflib fix for getting stack traces, then uncomment that code in
-   AhatSnapshot to use that.
  * Dim 'image' and 'zygote' heap sizes slightly? Why do we even show these?
  * Filter out RootObjs in mSnapshot.getGCRoots, not RootsHandler.
  * Let user re-sort sites objects info by clicking column headers.
@@ -25,23 +23,15 @@
  * Show root types.
  * Heaped Table
    - Make sortable by clicking on headers.
-   - Use consistent order for heap columns.
-      Sometimes I see "app" first, sometimes last (from one heap dump to
-      another) How about, always sort by name?
  * For HeapTable with single heap shown, the heap name isn't centered?
  * Consistently document functions.
  * Should help be part of an AhatHandler, that automatically gets the menu and
    stylesheet link rather than duplicating that?
  * Show version number with --version.
  * Show somewhere where to send bugs.
- * /objects query takes a long time to load without parameters.
  * Include a link to /objects in the overview and menu?
  * Turn on LOCAL_JAVACFLAGS := -Xlint:unchecked -Werror
  * Use hex for object ids in URLs?
- * In general, all tables and descriptions should show a limited amount to
-   start, and only show more when requested by the user.
- * Don't have handlers inherit from HttpHandler
-   - because they should be independent from http.
 
  * [low priority] by site allocations won't line up if the stack has been
    truncated. Is there any way to manually line them up in that case?
@@ -60,8 +50,6 @@
    objects normally sorted by 'app' heap by default.
  * Visit /objects without parameters and verify it doesn't throw an exception.
  * Visit /objects with an invalid site, verify it doesn't throw an exception.
- * That we can view an array with 3 million elements in a reasonably short
-   amount of time (not more than 1 second?)
  * That we can view the list of all objects in a reasonably short amount of
    time.
  * That we don't show the 'extra' column in the DominatedList if we are
@@ -72,8 +60,6 @@
 
 Reported Issues:
  * Request to be able to sort tables by size.
- * Hangs on showing large arrays, where hat does not hang.
-   - Solution is probably to not show all the array elements by default.
 
 Perflib Requests:
  * Class objects should have java.lang.Class as their class object, not null.
diff --git a/tools/ahat/src/AhatHandler.java b/tools/ahat/src/AhatHandler.java
index 2da02f8..d4b4d1b 100644
--- a/tools/ahat/src/AhatHandler.java
+++ b/tools/ahat/src/AhatHandler.java
@@ -16,51 +16,17 @@
 
 package com.android.ahat;
 
-import com.sun.net.httpserver.HttpExchange;
-import com.sun.net.httpserver.HttpHandler;
 import java.io.IOException;
-import java.io.PrintStream;
 
 /**
  * AhatHandler.
  *
- * Common base class of all the ahat HttpHandlers.
+ * Interface for an ahat page handler.
  */
-abstract class AhatHandler implements HttpHandler {
+interface AhatHandler {
 
-  protected AhatSnapshot mSnapshot;
-
-  public AhatHandler(AhatSnapshot snapshot) {
-    mSnapshot = snapshot;
-  }
-
-  public abstract void handle(Doc doc, Query query) throws IOException;
-
-  @Override
-  public void handle(HttpExchange exchange) throws IOException {
-    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
-    exchange.sendResponseHeaders(200, 0);
-    PrintStream ps = new PrintStream(exchange.getResponseBody());
-    try {
-      HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
-      DocString menu = new DocString();
-      menu.appendLink(DocString.uri("/"), DocString.text("overview"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("roots"), DocString.text("roots"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("sites"), DocString.text("allocations"));
-      menu.append(" - ");
-      menu.appendLink(DocString.uri("help"), DocString.text("help"));
-      doc.menu(menu);
-      handle(doc, new Query(exchange.getRequestURI()));
-      doc.close();
-    } catch (RuntimeException e) {
-      // Print runtime exceptions to standard error for debugging purposes,
-      // because otherwise they are swallowed and not reported.
-      System.err.println("Exception when handling " + exchange.getRequestURI() + ": ");
-      e.printStackTrace();
-      throw e;
-    }
-    ps.close();
-  }
+  /**
+   * Handle the given query, rendering the page to the given document.
+   */
+  void handle(Doc doc, Query query) throws IOException;
 }
diff --git a/tools/ahat/src/AhatHttpHandler.java b/tools/ahat/src/AhatHttpHandler.java
new file mode 100644
index 0000000..0553713
--- /dev/null
+++ b/tools/ahat/src/AhatHttpHandler.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+import java.io.IOException;
+import java.io.PrintStream;
+
+/**
+ * AhatHttpHandler.
+ *
+ * HttpHandler for AhatHandlers.
+ */
+class AhatHttpHandler implements HttpHandler {
+
+  private AhatHandler mAhatHandler;
+
+  public AhatHttpHandler(AhatHandler handler) {
+    mAhatHandler = handler;
+  }
+
+  @Override
+  public void handle(HttpExchange exchange) throws IOException {
+    exchange.getResponseHeaders().add("Content-Type", "text/html;charset=utf-8");
+    exchange.sendResponseHeaders(200, 0);
+    PrintStream ps = new PrintStream(exchange.getResponseBody());
+    try {
+      HtmlDoc doc = new HtmlDoc(ps, DocString.text("ahat"), DocString.uri("style.css"));
+      DocString menu = new DocString();
+      menu.appendLink(DocString.uri("/"), DocString.text("overview"));
+      menu.append(" - ");
+      menu.appendLink(DocString.uri("roots"), DocString.text("roots"));
+      menu.append(" - ");
+      menu.appendLink(DocString.uri("sites"), DocString.text("allocations"));
+      menu.append(" - ");
+      menu.appendLink(DocString.uri("help"), DocString.text("help"));
+      doc.menu(menu);
+      mAhatHandler.handle(doc, new Query(exchange.getRequestURI()));
+      doc.close();
+    } catch (RuntimeException e) {
+      // Print runtime exceptions to standard error for debugging purposes,
+      // because otherwise they are swallowed and not reported.
+      System.err.println("Exception when handling " + exchange.getRequestURI() + ": ");
+      e.printStackTrace();
+      throw e;
+    }
+    ps.close();
+  }
+}
diff --git a/tools/ahat/src/Doc.java b/tools/ahat/src/Doc.java
index 7fa70de..5a70c4c 100644
--- a/tools/ahat/src/Doc.java
+++ b/tools/ahat/src/Doc.java
@@ -25,27 +25,27 @@
   /**
    * Output the title of the page.
    */
-  public void title(String format, Object... args);
+  void title(String format, Object... args);
 
   /**
    * Print a line of text for a page menu.
    */
-  public void menu(DocString string);
+  void menu(DocString string);
 
   /**
    * Start a new section with the given title.
    */
-  public void section(String title);
+  void section(String title);
 
   /**
    * Print a line of text in a normal font.
    */
-  public void println(DocString string);
+  void println(DocString string);
 
   /**
    * Print a line of text in a large font that is easy to see and click on.
    */
-  public void big(DocString string);
+  void big(DocString string);
 
   /**
    * Start a table with the given columns.
@@ -55,7 +55,7 @@
    * This should be followed by calls to the 'row' method to fill in the table
    * contents and the 'end' method to end the table.
    */
-  public void table(Column... columns);
+  void table(Column... columns);
 
   /**
    * Start a table with the following heading structure:
@@ -68,14 +68,14 @@
    * This should be followed by calls to the 'row' method to fill in the table
    * contents and the 'end' method to end the table.
    */
-  public void table(DocString description, List<Column> subcols, List<Column> cols);
+  void table(DocString description, List<Column> subcols, List<Column> cols);
 
   /**
    * Add a row to the currently active table.
    * The number of values must match the number of columns provided for the
    * currently active table.
    */
-  public void row(DocString... values);
+  void row(DocString... values);
 
   /**
    * Start a new description list.
@@ -83,15 +83,15 @@
    * This should be followed by calls to description() and finally a call to
    * end().
    */
-  public void descriptions();
+  void descriptions();
 
   /**
    * Add a description to the currently active description list.
    */
-  public void description(DocString key, DocString value);
+  void description(DocString key, DocString value);
 
   /**
    * End the currently active table or description list.
    */
-  public void end();
+  void end();
 }
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index 123d8be..34a5665 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -21,71 +21,35 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Class for rendering a list of instances dominated by a single instance in a
  * pretty way.
  */
 class DominatedList {
-  private static final int kIncrAmount = 100;
-  private static final int kDefaultShown = 100;
-
   /**
    * Render a table to the given HtmlWriter showing a pretty list of
    * instances.
    *
-   * Rather than show all of the instances (which may be very many), we use
-   * the query parameter "dominated" to specify a limited number of
-   * instances to show. The 'uri' parameter should be the current page URI, so
-   * that we can add links to "show more" and "show less" objects that go to
-   * the same page with only the number of objects adjusted.
+   * @param snapshot  the snapshot where the instances reside
+   * @param doc       the document to render the dominated list to
+   * @param query     the current page query
+   * @param id        a unique identifier to use for the dominated list in the current page
+   * @param instances the collection of instances to generate a list for
    */
-  public static void render(final AhatSnapshot snapshot, Doc doc,
-      Collection<Instance> instances, Query query) {
+  public static void render(final AhatSnapshot snapshot,
+      Doc doc, Query query, String id, Collection<Instance> instances) {
     List<Instance> insts = new ArrayList<Instance>(instances);
     Collections.sort(insts, Sort.defaultInstanceCompare(snapshot));
-
-    int numInstancesToShow = getNumInstancesToShow(query, insts.size());
-    List<Instance> shown = new ArrayList<Instance>(insts.subList(0, numInstancesToShow));
-    List<Instance> hidden = insts.subList(numInstancesToShow, insts.size());
-
-    // Add 'null' as a marker for "all the rest of the objects".
-    if (!hidden.isEmpty()) {
-      shown.add(null);
-    }
-    HeapTable.render(doc, new TableConfig(snapshot, hidden), snapshot, shown);
-
-    if (insts.size() > kDefaultShown) {
-      printMenu(doc, query, numInstancesToShow, insts.size());
-    }
+    HeapTable.render(doc, query, id, new TableConfig(snapshot), snapshot, insts);
   }
 
   private static class TableConfig implements HeapTable.TableConfig<Instance> {
     AhatSnapshot mSnapshot;
 
-    // Map from heap name to the total size of the instances not shown in the
-    // table.
-    Map<Heap, Long> mHiddenSizes;
-
-    public TableConfig(AhatSnapshot snapshot, List<Instance> hidden) {
+    public TableConfig(AhatSnapshot snapshot) {
       mSnapshot = snapshot;
-      mHiddenSizes = new HashMap<Heap, Long>();
-      for (Heap heap : snapshot.getHeaps()) {
-        mHiddenSizes.put(heap, 0L);
-      }
-
-      if (!hidden.isEmpty()) {
-        for (Instance inst : hidden) {
-          for (Heap heap : snapshot.getHeaps()) {
-            int index = snapshot.getHeapIndex(heap);
-            long size = inst.getRetainedSize(index);
-            mHiddenSizes.put(heap, mHiddenSizes.get(heap) + size);
-          }
-        }
-      }
     }
 
     @Override
@@ -95,9 +59,6 @@
 
     @Override
     public long getSize(Instance element, Heap heap) {
-      if (element == null) {
-        return mHiddenSizes.get(heap);
-      }
       int index = mSnapshot.getHeapIndex(heap);
       return element.getRetainedSize(index);
     }
@@ -110,56 +71,10 @@
         }
 
         public DocString render(Instance element) {
-          if (element == null) {
-            return DocString.text("...");
-          } else {
-            return Value.render(element);
-          }
+          return Value.render(element);
         }
       };
       return Collections.singletonList(value);
     }
   }
-
-  // Figure out how many objects to show based on the query parameter.
-  // The resulting value is guaranteed to be at least zero, and no greater
-  // than the number of total objects.
-  private static int getNumInstancesToShow(Query query, int totalNumInstances) {
-    String value = query.get("dominated", null);
-    try {
-      int count = Math.min(totalNumInstances, Integer.parseInt(value));
-      return Math.max(0, count);
-    } catch (NumberFormatException e) {
-      // We can't parse the value as a number. Ignore it.
-    }
-    return Math.min(kDefaultShown, totalNumInstances);
-  }
-
-  // Print a menu line after the table to control how many objects are shown.
-  // It has the form:
-  //  (showing X of Y objects - show none - show less - show more - show all)
-  private static void printMenu(Doc doc, Query query, int shown, int all) {
-    DocString menu = new DocString();
-    menu.appendFormat("(%d of %d objects shown - ", shown, all);
-    if (shown > 0) {
-      int less = Math.max(0, shown - kIncrAmount);
-      menu.appendLink(query.with("dominated", 0), DocString.text("show none"));
-      menu.append(" - ");
-      menu.appendLink(query.with("dominated", less), DocString.text("show less"));
-      menu.append(" - ");
-    } else {
-      menu.append("show none - show less - ");
-    }
-    if (shown < all) {
-      int more = Math.min(shown + kIncrAmount, all);
-      menu.appendLink(query.with("dominated", more), DocString.text("show more"));
-      menu.append(" - ");
-      menu.appendLink(query.with("dominated", all), DocString.text("show all"));
-      menu.append(")");
-    } else {
-      menu.append("show more - show all)");
-    }
-    doc.println(menu);
-  }
 }
-
diff --git a/tools/ahat/src/HeapTable.java b/tools/ahat/src/HeapTable.java
index 37d5816..ed11d17 100644
--- a/tools/ahat/src/HeapTable.java
+++ b/tools/ahat/src/HeapTable.java
@@ -18,7 +18,9 @@
 
 import com.android.tools.perflib.heap.Heap;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Class for rendering a table that includes sizes of some kind for each heap.
@@ -27,22 +29,27 @@
   /**
    * Configuration for a value column of a heap table.
    */
-  public static interface ValueConfig<T> {
-    public String getDescription();
-    public DocString render(T element);
+  public interface ValueConfig<T> {
+    String getDescription();
+    DocString render(T element);
   }
 
   /**
    * Configuration for the HeapTable.
    */
-  public static interface TableConfig<T> {
-    public String getHeapsDescription();
-    public long getSize(T element, Heap heap);
-    public List<ValueConfig<T>> getValueConfigs();
+  public interface TableConfig<T> {
+    String getHeapsDescription();
+    long getSize(T element, Heap heap);
+    List<ValueConfig<T>> getValueConfigs();
   }
 
-  public static <T> void render(Doc doc, TableConfig<T> config,
-      AhatSnapshot snapshot, List<T> elements) {
+  /**
+   * Render the table to the given document.
+   * @param query - The page query.
+   * @param id - A unique identifier for the table on the page.
+   */
+  public static <T> void render(Doc doc, Query query, String id,
+      TableConfig<T> config, AhatSnapshot snapshot, List<T> elements) {
     // Only show the heaps that have non-zero entries.
     List<Heap> heaps = new ArrayList<Heap>();
     for (Heap heap : snapshot.getHeaps()) {
@@ -68,9 +75,10 @@
     }
     doc.table(DocString.text(config.getHeapsDescription()), subcols, cols);
 
-    // Print the entries.
+    // Print the entries up to the selected limit.
+    SubsetSelector<T> selector = new SubsetSelector(query, id, elements);
     ArrayList<DocString> vals = new ArrayList<DocString>();
-    for (T elem : elements) {
+    for (T elem : selector.selected()) {
       vals.clear();
       long total = 0;
       for (Heap heap : heaps) {
@@ -87,7 +95,39 @@
       }
       doc.row(vals.toArray(new DocString[0]));
     }
+
+    // Print a summary of the remaining entries if there are any.
+    List<T> remaining = selector.remaining();
+    if (!remaining.isEmpty()) {
+      Map<Heap, Long> summary = new HashMap<Heap, Long>();
+      for (Heap heap : heaps) {
+        summary.put(heap, 0L);
+      }
+
+      for (T elem : remaining) {
+        for (Heap heap : heaps) {
+          summary.put(heap, summary.get(heap) + config.getSize(elem, heap));
+        }
+      }
+
+      vals.clear();
+      long total = 0;
+      for (Heap heap : heaps) {
+        long size = summary.get(heap);
+        total += size;
+        vals.add(DocString.format("%,14d", size));
+      }
+      if (showTotal) {
+        vals.add(DocString.format("%,14d", total));
+      }
+
+      for (ValueConfig<T> value : values) {
+        vals.add(DocString.text("..."));
+      }
+      doc.row(vals.toArray(new DocString[0]));
+    }
     doc.end();
+    selector.render(doc);
   }
 
   // Returns true if the given heap has a non-zero size entry.
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index 1563aa0..96fc53b 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -73,11 +73,11 @@
     InetAddress loopback = InetAddress.getLoopbackAddress();
     InetSocketAddress addr = new InetSocketAddress(loopback, port);
     HttpServer server = HttpServer.create(addr, 0);
-    server.createContext("/", new OverviewHandler(ahat, hprof));
-    server.createContext("/roots", new RootsHandler(ahat));
-    server.createContext("/object", new ObjectHandler(ahat));
-    server.createContext("/objects", new ObjectsHandler(ahat));
-    server.createContext("/site", new SiteHandler(ahat));
+    server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
+    server.createContext("/roots", new AhatHttpHandler(new RootsHandler(ahat)));
+    server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
+    server.createContext("/objects", new AhatHttpHandler(new ObjectsHandler(ahat)));
+    server.createContext("/site", new AhatHttpHandler(new SiteHandler(ahat)));
     server.createContext("/bitmap", new BitmapHandler(ahat));
     server.createContext("/help", new StaticHandler("help.html", "text/html"));
     server.createContext("/style.css", new StaticHandler("style.css", "text/css"));
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 5e321e2..9e4ce56 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -25,13 +25,26 @@
 import com.android.tools.perflib.heap.RootObj;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 
-class ObjectHandler extends AhatHandler {
+class ObjectHandler implements AhatHandler {
+
+  private static final String ARRAY_ELEMENTS_ID = "elements";
+  private static final String DOMINATOR_PATH_ID = "dompath";
+  private static final String ALLOCATION_SITE_ID = "frames";
+  private static final String DOMINATED_OBJECTS_ID = "dominated";
+  private static final String INSTANCE_FIELDS_ID = "ifields";
+  private static final String STATIC_FIELDS_ID = "sfields";
+  private static final String HARD_REFS_ID = "refs";
+  private static final String SOFT_REFS_ID = "srefs";
+
+  private AhatSnapshot mSnapshot;
+
   public ObjectHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -46,8 +59,8 @@
     doc.title("Object %08x", inst.getUniqueId());
     doc.big(Value.render(inst));
 
-    printAllocationSite(doc, inst);
-    printDominatorPath(doc, inst);
+    printAllocationSite(doc, query, inst);
+    printDominatorPath(doc, query, inst);
 
     doc.section("Object Info");
     ClassObj cls = inst.getClassObj();
@@ -62,39 +75,46 @@
 
     printBitmap(doc, inst);
     if (inst instanceof ClassInstance) {
-      printClassInstanceFields(doc, (ClassInstance)inst);
+      printClassInstanceFields(doc, query, (ClassInstance)inst);
     } else if (inst instanceof ArrayInstance) {
-      printArrayElements(doc, (ArrayInstance)inst);
+      printArrayElements(doc, query, (ArrayInstance)inst);
     } else if (inst instanceof ClassObj) {
-      printClassInfo(doc, (ClassObj)inst);
+      printClassInfo(doc, query, (ClassObj)inst);
     }
-    printReferences(doc, inst);
+    printReferences(doc, query, inst);
     printDominatedObjects(doc, query, inst);
   }
 
-  private static void printClassInstanceFields(Doc doc, ClassInstance inst) {
+  private static void printClassInstanceFields(Doc doc, Query query, ClassInstance inst) {
     doc.section("Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    for (ClassInstance.FieldValue field : inst.getValues()) {
+    SubsetSelector<ClassInstance.FieldValue> selector
+      = new SubsetSelector(query, INSTANCE_FIELDS_ID, inst.getValues());
+    for (ClassInstance.FieldValue field : selector.selected()) {
       doc.row(
           DocString.text(field.getField().getType().toString()),
           DocString.text(field.getField().getName()),
           Value.render(field.getValue()));
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printArrayElements(Doc doc, ArrayInstance array) {
+  private static void printArrayElements(Doc doc, Query query, ArrayInstance array) {
     doc.section("Array Elements");
     doc.table(new Column("Index", Column.Align.RIGHT), new Column("Value"));
-    Object[] elements = array.getValues();
-    for (int i = 0; i < elements.length; i++) {
-      doc.row(DocString.format("%d", i), Value.render(elements[i]));
+    List<Object> elements = Arrays.asList(array.getValues());
+    SubsetSelector<Object> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
+    int i = 0;
+    for (Object elem : selector.selected()) {
+      doc.row(DocString.format("%d", i), Value.render(elem));
+      i++;
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printClassInfo(Doc doc, ClassObj clsobj) {
+  private static void printClassInfo(Doc doc, Query query, ClassObj clsobj) {
     doc.section("Class Info");
     doc.descriptions();
     doc.description(DocString.text("Super Class"), Value.render(clsobj.getSuperClassObj()));
@@ -103,41 +123,52 @@
 
     doc.section("Static Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
-    for (Map.Entry<Field, Object> field : clsobj.getStaticFieldValues().entrySet()) {
+    List<Map.Entry<Field, Object>> fields
+      = new ArrayList<Map.Entry<Field, Object>>(clsobj.getStaticFieldValues().entrySet());
+    SubsetSelector<Map.Entry<Field, Object>> selector
+      = new SubsetSelector(query, STATIC_FIELDS_ID, fields);
+    for (Map.Entry<Field, Object> field : selector.selected()) {
       doc.row(
           DocString.text(field.getKey().getType().toString()),
           DocString.text(field.getKey().getName()),
           Value.render(field.getValue()));
     }
     doc.end();
+    selector.render(doc);
   }
 
-  private static void printReferences(Doc doc, Instance inst) {
+  private static void printReferences(Doc doc, Query query, Instance inst) {
     doc.section("Objects with References to this Object");
     if (inst.getHardReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
     } else {
       doc.table(new Column("Object"));
-      for (Instance ref : inst.getHardReferences()) {
+      List<Instance> references = inst.getHardReferences();
+      SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
+      for (Instance ref : selector.selected()) {
         doc.row(Value.render(ref));
       }
       doc.end();
+      selector.render(doc);
     }
 
     if (inst.getSoftReferences() != null) {
       doc.section("Objects with Soft References to this Object");
       doc.table(new Column("Object"));
-      for (Instance ref : inst.getSoftReferences()) {
-        doc.row(Value.render(inst));
+      List<Instance> references = inst.getSoftReferences();
+      SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
+      for (Instance ref : selector.selected()) {
+        doc.row(Value.render(ref));
       }
       doc.end();
+      selector.render(doc);
     }
   }
 
-  private void printAllocationSite(Doc doc, Instance inst) {
+  private void printAllocationSite(Doc doc, Query query, Instance inst) {
     doc.section("Allocation Site");
     Site site = mSnapshot.getSiteForInstance(inst);
-    SitePrinter.printSite(doc, mSnapshot, site);
+    SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
   }
 
   // Draw the bitmap corresponding to this instance if there is one.
@@ -150,7 +181,7 @@
     }
   }
 
-  private void printDominatorPath(Doc doc, Instance inst) {
+  private void printDominatorPath(Doc doc, Query query, Instance inst) {
     doc.section("Dominator Path from Root");
     List<Instance> path = new ArrayList<Instance>();
     for (Instance parent = inst;
@@ -193,14 +224,14 @@
         return Collections.singletonList(value);
       }
     };
-    HeapTable.render(doc, table, mSnapshot, path);
+    HeapTable.render(doc, query, DOMINATOR_PATH_ID, table, mSnapshot, path);
   }
 
   public void printDominatedObjects(Doc doc, Query query, Instance inst) {
     doc.section("Immediately Dominated Objects");
     List<Instance> instances = mSnapshot.getDominated(inst);
     if (instances != null) {
-      DominatedList.render(mSnapshot, doc, instances, query);
+      DominatedList.render(mSnapshot, doc, query, DOMINATED_OBJECTS_ID, instances);
     } else {
       doc.println(DocString.text("(none)"));
     }
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 4e9c42e..8ad3f48 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -22,9 +22,13 @@
 import java.util.Collections;
 import java.util.List;
 
-class ObjectsHandler extends AhatHandler {
+class ObjectsHandler implements AhatHandler {
+  private static final String OBJECTS_ID = "objects";
+
+  private AhatSnapshot mSnapshot;
+
   public ObjectsHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -51,13 +55,15 @@
         new Column("Size", Column.Align.RIGHT),
         new Column("Heap"),
         new Column("Object"));
-    for (Instance inst : insts) {
+    SubsetSelector<Instance> selector = new SubsetSelector(query, OBJECTS_ID, insts);
+    for (Instance inst : selector.selected()) {
       doc.row(
           DocString.format("%,d", inst.getSize()),
           DocString.text(inst.getHeap().getName()),
           Value.render(inst));
     }
     doc.end();
+    selector.render(doc);
   }
 }
 
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index f49c009..e86679f 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -22,11 +22,15 @@
 import java.util.Collections;
 import java.util.List;
 
-class OverviewHandler extends AhatHandler {
+class OverviewHandler implements AhatHandler {
+
+  private static final String OVERVIEW_ID = "overview";
+
+  private AhatSnapshot mSnapshot;
   private File mHprof;
 
   public OverviewHandler(AhatSnapshot snapshot, File hprof) {
-    super(snapshot);
+    mSnapshot = snapshot;
     mHprof = hprof;
   }
 
@@ -43,7 +47,7 @@
     doc.end();
 
     doc.section("Heap Sizes");
-    printHeapSizes(doc);
+    printHeapSizes(doc, query);
 
     DocString menu = new DocString();
     menu.appendLink(DocString.uri("roots"), DocString.text("Roots"));
@@ -54,7 +58,7 @@
     doc.big(menu);
   }
 
-  private void printHeapSizes(Doc doc) {
+  private void printHeapSizes(Doc doc, Query query) {
     List<Object> dummy = Collections.singletonList(null);
 
     HeapTable.TableConfig<Object> table = new HeapTable.TableConfig<Object>() {
@@ -70,7 +74,7 @@
         return Collections.emptyList();
       }
     };
-    HeapTable.render(doc, table, mSnapshot, dummy);
+    HeapTable.render(doc, query, OVERVIEW_ID, table, mSnapshot, dummy);
   }
 }
 
diff --git a/tools/ahat/src/RootsHandler.java b/tools/ahat/src/RootsHandler.java
index 185b9bf..2a92c90 100644
--- a/tools/ahat/src/RootsHandler.java
+++ b/tools/ahat/src/RootsHandler.java
@@ -24,9 +24,14 @@
 import java.util.List;
 import java.util.Set;
 
-class RootsHandler extends AhatHandler {
+class RootsHandler implements AhatHandler {
+
+  private static final String ROOTS_ID = "roots";
+
+  private AhatSnapshot mSnapshot;
+
   public RootsHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -45,7 +50,7 @@
     for (Instance inst : rootset) {
       roots.add(inst);
     }
-    DominatedList.render(mSnapshot, doc, roots, query);
+    DominatedList.render(mSnapshot, doc, query, ROOTS_ID, roots);
   }
 }
 
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index 0a9381e..0425a5a 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -22,9 +22,15 @@
 import java.util.Comparator;
 import java.util.List;
 
-class SiteHandler extends AhatHandler {
+class SiteHandler implements AhatHandler {
+  private static final String ALLOCATION_SITE_ID = "frames";
+  private static final String SITES_CALLED_ID = "called";
+  private static final String OBJECTS_ALLOCATED_ID = "objects";
+
+  private AhatSnapshot mSnapshot;
+
   public SiteHandler(AhatSnapshot snapshot) {
-    super(snapshot);
+    mSnapshot = snapshot;
   }
 
   @Override
@@ -35,7 +41,7 @@
 
     doc.title("Site %s", site.getName());
     doc.section("Allocation Site");
-    SitePrinter.printSite(doc, mSnapshot, site);
+    SitePrinter.printSite(mSnapshot, doc, query, ALLOCATION_SITE_ID, site);
 
     doc.section("Sites Called from Here");
     List<Site> children = site.getChildren();
@@ -69,7 +75,7 @@
           return Collections.singletonList(value);
         }
       };
-      HeapTable.render(doc, table, mSnapshot, children);
+      HeapTable.render(doc, query, SITES_CALLED_ID, table, mSnapshot, children);
     }
 
     doc.section("Objects Allocated");
@@ -84,7 +90,9 @@
         new Sort.ObjectsInfoBySize(),
         new Sort.ObjectsInfoByClassName());
     Collections.sort(infos, compare);
-    for (Site.ObjectsInfo info : infos) {
+    SubsetSelector<Site.ObjectsInfo> selector
+      = new SubsetSelector(query, OBJECTS_ALLOCATED_ID, infos);
+    for (Site.ObjectsInfo info : selector.selected()) {
       String className = AhatSnapshot.getClassName(info.classObj);
       doc.row(
           DocString.format("%,14d", info.numBytes),
@@ -96,6 +104,7 @@
           Value.render(info.classObj));
     }
     doc.end();
+    selector.render(doc);
   }
 }
 
diff --git a/tools/ahat/src/SitePrinter.java b/tools/ahat/src/SitePrinter.java
index be87032..2c06b47 100644
--- a/tools/ahat/src/SitePrinter.java
+++ b/tools/ahat/src/SitePrinter.java
@@ -22,7 +22,7 @@
 import java.util.List;
 
 class SitePrinter {
-  public static void printSite(Doc doc, AhatSnapshot snapshot, Site site) {
+  public static void printSite(AhatSnapshot snapshot, Doc doc, Query query, String id, Site site) {
     List<Site> path = new ArrayList<Site>();
     for (Site parent = site; parent != null; parent = parent.getParent()) {
       path.add(parent);
@@ -60,6 +60,6 @@
         return Collections.singletonList(value);
       }
     };
-    HeapTable.render(doc, table, snapshot, path);
+    HeapTable.render(doc, query, id, table, snapshot, path);
   }
 }
diff --git a/tools/ahat/src/SubsetSelector.java b/tools/ahat/src/SubsetSelector.java
new file mode 100644
index 0000000..79399c1
--- /dev/null
+++ b/tools/ahat/src/SubsetSelector.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import java.util.List;
+
+/**
+ * The SubsetSelector is that can be added to a page that lets the
+ * user select a limited number of elements to show.
+ * This is used to limit the number of elements shown on a page by default,
+ * requiring the user to explicitly request more, so users not interested in
+ * more don't have to wait for everything to render.
+ */
+class SubsetSelector<T> {
+  private static final int kIncrAmount = 1000;
+  private static final int kDefaultShown = 1000;
+
+  private Query mQuery;
+  private String mId;
+  private int mLimit;
+  private List<T> mElements;
+
+  /**
+   * @param id - the name of the query parameter key that should hold
+   * the limit selectors selected value.
+   * @param query - The query for the current page. This is required so the
+   * LimitSelector can add a link to the same page with modified limit
+   * selection.
+   * @param elements - the elements to select from. The collection of elements
+   * should not be modified during the lifetime of the SubsetSelector object.
+   */
+  public SubsetSelector(Query query, String id, List<T> elements) {
+    mQuery = query;
+    mId = id;
+    mLimit = getSelectedLimit(query, id, elements.size());
+    mElements = elements;
+  }
+
+  // Return the list of elements included in the selected subset.
+  public List<T> selected() {
+    return mElements.subList(0, mLimit);
+  }
+
+  // Return the list of remaining elements not included in the selected subset.
+  public List<T> remaining() {
+    return mElements.subList(mLimit, mElements.size());
+  }
+
+  /**
+   * Returns the currently selected limit.
+   * @param query the current page query
+   * @param size the total number of elements to select from
+   * @return the number of selected elements
+   */
+  private static int getSelectedLimit(Query query, String id, int size) {
+    String value = query.get(id, null);
+    try {
+      int ivalue = Math.min(size, Integer.parseInt(value));
+      return Math.max(0, ivalue);
+    } catch (NumberFormatException e) {
+      // We can't parse the value as a number. Ignore it.
+    }
+    return Math.min(kDefaultShown, size);
+  }
+
+  // Render the limit selector to the given doc.
+  // It has the form:
+  //  (showing X of Y - show none - show less - show more - show all)
+  public void render(Doc doc) {
+    int all = mElements.size();
+    if (all > kDefaultShown) {
+      DocString menu = new DocString();
+      menu.appendFormat("(%d of %d elements shown - ", mLimit, all);
+      if (mLimit > 0) {
+        int less = Math.max(0, mLimit - kIncrAmount);
+        menu.appendLink(mQuery.with(mId, 0), DocString.text("show none"));
+        menu.append(" - ");
+        menu.appendLink(mQuery.with(mId, less), DocString.text("show less"));
+        menu.append(" - ");
+      } else {
+        menu.append("show none - show less - ");
+      }
+      if (mLimit < all) {
+        int more = Math.min(mLimit + kIncrAmount, all);
+        menu.appendLink(mQuery.with(mId, more), DocString.text("show more"));
+        menu.append(" - ");
+        menu.appendLink(mQuery.with(mId, all), DocString.text("show all"));
+        menu.append(")");
+      } else {
+        menu.append("show more - show all)");
+      }
+      doc.println(menu);
+    }
+  }
+}
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 7b8774a..90cd7af 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -39,6 +39,15 @@
     public ReferenceQueue<Object> referenceQueue = new ReferenceQueue<Object>();
     public PhantomReference aPhantomReference = new PhantomReference(anObject, referenceQueue);
     public WeakReference aWeakReference = new WeakReference(anObject, referenceQueue);
+    public byte[] bigArray;
+
+    DumpedStuff() {
+      int N = 1000000;
+      bigArray = new byte[N];
+      for (int i = 0; i < N; i++) {
+        bigArray[i] = (byte)((i*i) & 0xFF);
+      }
+    }
   }
 
   public static void main(String[] args) throws IOException {
diff --git a/tools/ahat/test/PerformanceTest.java b/tools/ahat/test/PerformanceTest.java
new file mode 100644
index 0000000..6e46800
--- /dev/null
+++ b/tools/ahat/test/PerformanceTest.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.tools.perflib.heap.Instance;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+public class PerformanceTest {
+  private static class NullOutputStream extends OutputStream {
+    public void write(int b) throws IOException {
+    }
+  }
+
+  @Test
+  public void bigArray() throws IOException {
+    // It should not take more than 1 second to load the default object view
+    // for any object, including big arrays.
+    TestDump dump = TestDump.getTestDump();
+
+    Instance bigArray = (Instance)dump.getDumpedThing("bigArray");
+    assertNotNull(bigArray);
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    AhatHandler handler = new ObjectHandler(snapshot);
+
+    PrintStream ps = new PrintStream(new NullOutputStream());
+    HtmlDoc doc = new HtmlDoc(ps, DocString.text("bigArray test"), DocString.uri("style.css"));
+    String uri = "http://localhost:7100/object?id=" + bigArray.getId();
+    Query query = new Query(DocString.uri(uri));
+
+    long start = System.currentTimeMillis();
+    handler.handle(doc, query);
+    long time = System.currentTimeMillis() - start;
+    assertTrue("bigArray took too long: " + time + "ms", time < 1000);
+  }
+}
diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java
index bab7121..e8894e2 100644
--- a/tools/ahat/test/Tests.java
+++ b/tools/ahat/test/Tests.java
@@ -23,8 +23,9 @@
     if (args.length == 0) {
       args = new String[]{
         "com.android.ahat.InstanceUtilsTest",
+        "com.android.ahat.PerformanceTest",
         "com.android.ahat.QueryTest",
-        "com.android.ahat.SortTest"
+        "com.android.ahat.SortTest",
       };
     }
     JUnitCore.main(args);
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 631e0a0..047c24f 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -21,7 +21,7 @@
 
 out_dir=${OUT_DIR-out}
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
-common_targets="vogar vogar.jar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
+common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar"
 mode="target"
 j_arg="-j$(nproc)"
 showcommands=