Merge "inliner: Do not assume that the outermost_graph has an art method" into oc-dev
am: 055242be79

Change-Id: I1b5c377d7632015bc951256333a69d546bcf0f58
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 11af1c0..c4374f7 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -227,6 +227,8 @@
   $(TARGET_CORE_IMAGE_DEFAULT_64) \
   $(TARGET_CORE_IMAGE_DEFAULT_32) \
   oatdump
+ART_GTEST_oatdump_image_test_HOST_DEPS := $(ART_GTEST_oatdump_test_HOST_DEPS)
+ART_GTEST_oatdump_image_test_TARGET_DEPS := $(ART_GTEST_oatdump_test_TARGET_DEPS)
 
 # Profile assistant tests requires profman utility.
 ART_GTEST_profile_assistant_test_HOST_DEPS := \
diff --git a/cmdline/cmdline_parser.h b/cmdline/cmdline_parser.h
index d82fd48..32480dd 100644
--- a/cmdline/cmdline_parser.h
+++ b/cmdline/cmdline_parser.h
@@ -612,7 +612,7 @@
 template <typename TVariantMap,
           template <typename TKeyValue> class TVariantMapKey>
 template <typename TArg>
-CmdlineParser<TVariantMap, TVariantMapKey>::ArgumentBuilder<TArg>
+typename CmdlineParser<TVariantMap, TVariantMapKey>::template ArgumentBuilder<TArg>
 CmdlineParser<TVariantMap, TVariantMapKey>::CreateArgumentBuilder(
     CmdlineParser<TVariantMap, TVariantMapKey>::Builder& parent) {
   return CmdlineParser<TVariantMap, TVariantMapKey>::ArgumentBuilder<TArg>(
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 6ef866a..a2b07af 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -338,6 +338,7 @@
         "elf_writer_test.cc",
         "exception_test.cc",
         "image_test.cc",
+        "image_write_read_test.cc",
         "jni/jni_compiler_test.cc",
         "linker/multi_oat_relative_patcher_test.cc",
         "linker/output_stream_test.cc",
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 874e357..fbab9df 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -179,6 +179,40 @@
                                      uint16_t class_def_index,
                                      bool requires)
       REQUIRES(!requires_constructor_barrier_lock_);
+
+  // Do the <init> methods for this class require a constructor barrier (prior to the return)?
+  // The answer is "yes", if and only if this class has any instance final fields.
+  // (This must not be called for any non-<init> methods; the answer would be "no").
+  //
+  // ---
+  //
+  // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end
+  // of the invoked constructor. The constructor barrier is a conservative implementation means of
+  // enforcing the freezes happen-before the object being constructed is observable by another
+  // thread.
+  //
+  // Note: This question only makes sense for instance constructors;
+  // static constructors (despite possibly having finals) never need
+  // a barrier.
+  //
+  // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes
+  // class initialization as:
+  //
+  //   lock(class.lock)
+  //     class.state = initializing
+  //   unlock(class.lock)
+  //
+  //   invoke <clinit>
+  //
+  //   lock(class.lock)
+  //     class.state = initialized
+  //   unlock(class.lock)              <-- acts as a release
+  //
+  // The last operation in the above example acts as an atomic release
+  // for any stores in <clinit>, which ends up being stricter
+  // than what a constructor barrier needs.
+  //
+  // See also QuasiAtomic::ThreadFenceForConstructor().
   bool RequiresConstructorBarrier(Thread* self,
                                   const DexFile* dex_file,
                                   uint16_t class_def_index)
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index a0c0a2a..a4e2083 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -200,7 +200,7 @@
     ParseDumpInitFailures(option, Usage);
   } else if (option.starts_with("--dump-cfg=")) {
     dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
-  } else if (option.starts_with("--dump-cfg-append")) {
+  } else if (option == "--dump-cfg-append") {
     dump_cfg_append_ = true;
   } else if (option.starts_with("--register-allocation-strategy=")) {
     ParseRegisterAllocationStrategy(option, Usage);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e53d8d..9d7aff7 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -14,492 +14,17 @@
  * limitations under the License.
  */
 
-#include "image.h"
-
-#include <memory>
-#include <string>
+#include <string.h>
 #include <vector>
 
-#include "android-base/stringprintf.h"
+#include "image_test.h"
 
-#include "art_method-inl.h"
-#include "base/unix_file/fd_file.h"
-#include "class_linker-inl.h"
-#include "compiler_callbacks.h"
-#include "common_compiler_test.h"
-#include "debug/method_debug_info.h"
-#include "dex/quick_compiler_callbacks.h"
-#include "driver/compiler_options.h"
-#include "elf_writer.h"
-#include "elf_writer_quick.h"
-#include "gc/space/image_space.h"
-#include "image_writer.h"
-#include "linker/buffered_output_stream.h"
-#include "linker/file_output_stream.h"
-#include "linker/multi_oat_relative_patcher.h"
-#include "lock_word.h"
-#include "mirror/object-inl.h"
-#include "oat_writer.h"
+#include "image.h"
 #include "scoped_thread_state_change-inl.h"
-#include "signal_catcher.h"
-#include "utils.h"
+#include "thread.h"
 
 namespace art {
 
-static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS;
-
-struct CompilationHelper {
-  std::vector<std::string> dex_file_locations;
-  std::vector<ScratchFile> image_locations;
-  std::vector<std::unique_ptr<const DexFile>> extra_dex_files;
-  std::vector<ScratchFile> image_files;
-  std::vector<ScratchFile> oat_files;
-  std::vector<ScratchFile> vdex_files;
-  std::string image_dir;
-
-  void Compile(CompilerDriver* driver,
-               ImageHeader::StorageMode storage_mode);
-
-  std::vector<size_t> GetImageObjectSectionSizes();
-
-  ~CompilationHelper();
-};
-
-class ImageTest : public CommonCompilerTest {
- protected:
-  virtual void SetUp() {
-    ReserveImageSpace();
-    CommonCompilerTest::SetUp();
-  }
-
-  void TestWriteRead(ImageHeader::StorageMode storage_mode);
-
-  void Compile(ImageHeader::StorageMode storage_mode,
-               CompilationHelper& out_helper,
-               const std::string& extra_dex = "",
-               const std::initializer_list<std::string>& image_classes = {});
-
-  void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
-    CommonCompilerTest::SetUpRuntimeOptions(options);
-    callbacks_.reset(new QuickCompilerCallbacks(
-        verification_results_.get(),
-        CompilerCallbacks::CallbackMode::kCompileBootImage));
-    options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
-  }
-
-  std::unordered_set<std::string>* GetImageClasses() OVERRIDE {
-    return new std::unordered_set<std::string>(image_classes_);
-  }
-
-  ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    PointerSize pointer_size = class_linker_->GetImagePointerSize();
-    for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
-      if (strcmp(origin->GetName(), m.GetName()) == 0 &&
-          origin->GetSignature() == m.GetSignature()) {
-        return &m;
-      }
-    }
-    return nullptr;
-  }
-
- private:
-  std::unordered_set<std::string> image_classes_;
-};
-
-CompilationHelper::~CompilationHelper() {
-  for (ScratchFile& image_file : image_files) {
-    image_file.Unlink();
-  }
-  for (ScratchFile& oat_file : oat_files) {
-    oat_file.Unlink();
-  }
-  for (ScratchFile& vdex_file : vdex_files) {
-    vdex_file.Unlink();
-  }
-  const int rmdir_result = rmdir(image_dir.c_str());
-  CHECK_EQ(0, rmdir_result);
-}
-
-std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
-  std::vector<size_t> ret;
-  for (ScratchFile& image_file : image_files) {
-    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
-    CHECK(file.get() != nullptr);
-    ImageHeader image_header;
-    CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
-    CHECK(image_header.IsValid());
-    ret.push_back(image_header.GetImageSize());
-  }
-  return ret;
-}
-
-void CompilationHelper::Compile(CompilerDriver* driver,
-                                ImageHeader::StorageMode storage_mode) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  std::vector<const DexFile*> class_path = class_linker->GetBootClassPath();
-
-  for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) {
-    {
-      ScopedObjectAccess soa(Thread::Current());
-      // Inject in boot class path so that the compiler driver can see it.
-      class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get());
-    }
-    class_path.push_back(dex_file.get());
-  }
-
-  // Enable write for dex2dex.
-  for (const DexFile* dex_file : class_path) {
-    dex_file_locations.push_back(dex_file->GetLocation());
-    if (dex_file->IsReadOnly()) {
-      dex_file->EnableWrite();
-    }
-  }
-  {
-    // Create a generic tmp file, to be the base of the .art and .oat temporary files.
-    ScratchFile location;
-    for (int i = 0; i < static_cast<int>(class_path.size()); ++i) {
-      std::string cur_location =
-          android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i);
-      image_locations.push_back(ScratchFile(cur_location));
-    }
-  }
-  std::vector<std::string> image_filenames;
-  for (ScratchFile& file : image_locations) {
-    std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA));
-    image_filenames.push_back(image_filename);
-    size_t pos = image_filename.rfind('/');
-    CHECK_NE(pos, std::string::npos) << image_filename;
-    if (image_dir.empty()) {
-      image_dir = image_filename.substr(0, pos);
-      int mkdir_result = mkdir(image_dir.c_str(), 0700);
-      CHECK_EQ(0, mkdir_result) << image_dir;
-    }
-    image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str())));
-  }
-
-  std::vector<std::string> oat_filenames;
-  std::vector<std::string> vdex_filenames;
-  for (const std::string& image_filename : image_filenames) {
-    std::string oat_filename = ReplaceFileExtension(image_filename, "oat");
-    oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str())));
-    oat_filenames.push_back(oat_filename);
-    std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex");
-    vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str())));
-    vdex_filenames.push_back(vdex_filename);
-  }
-
-  std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
-  std::vector<const char*> oat_filename_vector;
-  for (const std::string& file : oat_filenames) {
-    oat_filename_vector.push_back(file.c_str());
-  }
-  std::vector<const char*> image_filename_vector;
-  for (const std::string& file : image_filenames) {
-    image_filename_vector.push_back(file.c_str());
-  }
-  size_t image_idx = 0;
-  for (const DexFile* dex_file : class_path) {
-    dex_file_to_oat_index_map.emplace(dex_file, image_idx);
-    ++image_idx;
-  }
-  // TODO: compile_pic should be a test argument.
-  std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver,
-                                                      kRequestedImageBase,
-                                                      /*compile_pic*/false,
-                                                      /*compile_app_image*/false,
-                                                      storage_mode,
-                                                      oat_filename_vector,
-                                                      dex_file_to_oat_index_map));
-  {
-    {
-      jobject class_loader = nullptr;
-      TimingLogger timings("ImageTest::WriteRead", false, false);
-      TimingLogger::ScopedTiming t("CompileAll", &timings);
-      driver->SetDexFilesForOatFile(class_path);
-      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
-
-      t.NewTiming("WriteElf");
-      SafeMap<std::string, std::string> key_value_store;
-      std::vector<const char*> dex_filename_vector;
-      for (size_t i = 0; i < class_path.size(); ++i) {
-        dex_filename_vector.push_back("");
-      }
-      key_value_store.Put(OatHeader::kBootClassPathKey,
-                          gc::space::ImageSpace::GetMultiImageBootClassPath(
-                              dex_filename_vector,
-                              oat_filename_vector,
-                              image_filename_vector));
-
-      std::vector<std::unique_ptr<ElfWriter>> elf_writers;
-      std::vector<std::unique_ptr<OatWriter>> oat_writers;
-      for (ScratchFile& oat_file : oat_files) {
-        elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(),
-                                                      driver->GetInstructionSetFeatures(),
-                                                      &driver->GetCompilerOptions(),
-                                                      oat_file.GetFile()));
-        elf_writers.back()->Start();
-        oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
-                                               &timings,
-                                               /*profile_compilation_info*/nullptr));
-      }
-
-      std::vector<OutputStream*> rodata;
-      std::vector<std::unique_ptr<MemMap>> opened_dex_files_map;
-      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
-      // Now that we have finalized key_value_store_, start writing the oat file.
-      for (size_t i = 0, size = oat_writers.size(); i != size; ++i) {
-        const DexFile* dex_file = class_path[i];
-        rodata.push_back(elf_writers[i]->StartRoData());
-        ArrayRef<const uint8_t> raw_dex_file(
-            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
-            dex_file->GetHeader().file_size_);
-        oat_writers[i]->AddRawDexFileSource(raw_dex_file,
-                                            dex_file->GetLocation().c_str(),
-                                            dex_file->GetLocationChecksum());
-
-        std::unique_ptr<MemMap> cur_opened_dex_files_map;
-        std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files;
-        bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles(
-            kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(),
-            rodata.back(),
-            driver->GetInstructionSet(),
-            driver->GetInstructionSetFeatures(),
-            &key_value_store,
-            /* verify */ false,           // Dex files may be dex-to-dex-ed, don't verify.
-            /* update_input_vdex */ false,
-            &cur_opened_dex_files_map,
-            &cur_opened_dex_files);
-        ASSERT_TRUE(dex_files_ok);
-
-        if (cur_opened_dex_files_map != nullptr) {
-          opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map));
-          for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) {
-            // dex_file_oat_index_map_.emplace(dex_file.get(), i);
-            opened_dex_files.push_back(std::move(cur_dex_file));
-          }
-        } else {
-          ASSERT_TRUE(cur_opened_dex_files.empty());
-        }
-      }
-      bool image_space_ok = writer->PrepareImageAddressSpace();
-      ASSERT_TRUE(image_space_ok);
-
-      if (kIsVdexEnabled) {
-        for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
-          std::unique_ptr<BufferedOutputStream> vdex_out(
-              MakeUnique<BufferedOutputStream>(
-                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
-          oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
-          oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
-        }
-      }
-
-      for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
-        linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
-                                                driver->GetInstructionSetFeatures());
-        OatWriter* const oat_writer = oat_writers[i].get();
-        ElfWriter* const elf_writer = elf_writers[i].get();
-        std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
-        oat_writer->Initialize(driver, writer.get(), cur_dex_files);
-        oat_writer->PrepareLayout(&patcher);
-        size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
-        size_t text_size = oat_writer->GetOatSize() - rodata_size;
-        elf_writer->PrepareDynamicSection(rodata_size,
-                                          text_size,
-                                          oat_writer->GetBssSize(),
-                                          oat_writer->GetBssRootsOffset());
-
-        writer->UpdateOatFileLayout(i,
-                                    elf_writer->GetLoadedSize(),
-                                    oat_writer->GetOatDataOffset(),
-                                    oat_writer->GetOatSize());
-
-        bool rodata_ok = oat_writer->WriteRodata(rodata[i]);
-        ASSERT_TRUE(rodata_ok);
-        elf_writer->EndRoData(rodata[i]);
-
-        OutputStream* text = elf_writer->StartText();
-        bool text_ok = oat_writer->WriteCode(text);
-        ASSERT_TRUE(text_ok);
-        elf_writer->EndText(text);
-
-        bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
-        ASSERT_TRUE(header_ok);
-
-        writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
-
-        elf_writer->WriteDynamicSection();
-        elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-
-        bool success = elf_writer->End();
-        ASSERT_TRUE(success);
-      }
-    }
-
-    bool success_image = writer->Write(kInvalidFd,
-                                       image_filename_vector,
-                                       oat_filename_vector);
-    ASSERT_TRUE(success_image);
-
-    for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) {
-      const char* oat_filename = oat_filenames[i].c_str();
-      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
-      ASSERT_TRUE(oat_file != nullptr);
-      bool success_fixup = ElfWriter::Fixup(oat_file.get(),
-                                            writer->GetOatDataBegin(i));
-      ASSERT_TRUE(success_fixup);
-      ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
-                                                  << oat_filename;
-    }
-  }
-}
-
-void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
-                        CompilationHelper& helper,
-                        const std::string& extra_dex,
-                        const std::initializer_list<std::string>& image_classes) {
-  for (const std::string& image_class : image_classes) {
-    image_classes_.insert(image_class);
-  }
-  CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
-  // Set inline filter values.
-  compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
-  image_classes_.clear();
-  if (!extra_dex.empty()) {
-    helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
-  }
-  helper.Compile(compiler_driver_.get(), storage_mode);
-  if (image_classes.begin() != image_classes.end()) {
-    // Make sure the class got initialized.
-    ScopedObjectAccess soa(Thread::Current());
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    for (const std::string& image_class : image_classes) {
-      mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
-      EXPECT_TRUE(klass != nullptr);
-      EXPECT_TRUE(klass->IsInitialized());
-    }
-  }
-}
-
-void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
-  CompilationHelper helper;
-  Compile(storage_mode, /*out*/ helper);
-  std::vector<uint64_t> image_file_sizes;
-  for (ScratchFile& image_file : helper.image_files) {
-    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
-    ASSERT_TRUE(file.get() != nullptr);
-    ImageHeader image_header;
-    ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
-    ASSERT_TRUE(image_header.IsValid());
-    const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
-    ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
-    ASSERT_NE(0U, bitmap_section.Size());
-
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    ASSERT_TRUE(heap->HaveContinuousSpaces());
-    gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
-    ASSERT_FALSE(space->IsImageSpace());
-    ASSERT_TRUE(space != nullptr);
-    ASSERT_TRUE(space->IsMallocSpace());
-    image_file_sizes.push_back(file->GetLength());
-  }
-
-  ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
-  std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses());
-
-  // Need to delete the compiler since it has worker threads which are attached to runtime.
-  compiler_driver_.reset();
-
-  // Tear down old runtime before making a new one, clearing out misc state.
-
-  // Remove the reservation of the memory for use to load the image.
-  // Need to do this before we reset the runtime.
-  UnreserveImageSpace();
-
-  helper.extra_dex_files.clear();
-  runtime_.reset();
-  java_lang_dex_file_ = nullptr;
-
-  MemMap::Init();
-
-  RuntimeOptions options;
-  std::string image("-Ximage:");
-  image.append(helper.image_locations[0].GetFilename());
-  options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr)));
-  // By default the compiler this creates will not include patch information.
-  options.push_back(std::make_pair("-Xnorelocate", nullptr));
-
-  if (!Runtime::Create(options, false)) {
-    LOG(FATAL) << "Failed to create runtime";
-    return;
-  }
-  runtime_.reset(Runtime::Current());
-  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
-  // give it away now and then switch to a more managable ScopedObjectAccess.
-  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
-  ScopedObjectAccess soa(Thread::Current());
-  ASSERT_TRUE(runtime_.get() != nullptr);
-  class_linker_ = runtime_->GetClassLinker();
-
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  ASSERT_TRUE(heap->HasBootImageSpace());
-  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
-
-  // We loaded the runtime with an explicit image, so it must exist.
-  ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size());
-  for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) {
-    std::unique_ptr<const DexFile> dex(
-        LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str()));
-    ASSERT_TRUE(dex != nullptr);
-    uint64_t image_file_size = image_file_sizes[i];
-    gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i];
-    ASSERT_TRUE(image_space != nullptr);
-    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
-      // Uncompressed, image should be smaller than file.
-      ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size);
-    } else if (image_file_size > 16 * KB) {
-      // Compressed, file should be smaller than image. Not really valid for small images.
-      ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize());
-    }
-
-    image_space->VerifyImageAllocations();
-    uint8_t* image_begin = image_space->Begin();
-    uint8_t* image_end = image_space->End();
-    if (i == 0) {
-      // This check is only valid for image 0.
-      CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin));
-    }
-    for (size_t j = 0; j < dex->NumClassDefs(); ++j) {
-      const DexFile::ClassDef& class_def = dex->GetClassDef(j);
-      const char* descriptor = dex->GetClassDescriptor(class_def);
-      mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor);
-      EXPECT_TRUE(klass != nullptr) << descriptor;
-      if (image_classes.find(descriptor) == image_classes.end()) {
-        EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end ||
-                    reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor;
-      } else {
-        // Image classes should be located inside the image.
-        EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor;
-        EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor;
-      }
-      EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
-    }
-  }
-}
-
-TEST_F(ImageTest, WriteReadUncompressed) {
-  TestWriteRead(ImageHeader::kStorageModeUncompressed);
-}
-
-TEST_F(ImageTest, WriteReadLZ4) {
-  TestWriteRead(ImageHeader::kStorageModeLZ4);
-}
-
-TEST_F(ImageTest, WriteReadLZ4HC) {
-  TestWriteRead(ImageHeader::kStorageModeLZ4HC);
-}
-
 TEST_F(ImageTest, TestImageLayout) {
   std::vector<size_t> image_sizes;
   std::vector<size_t> image_sizes_extra;
diff --git a/compiler/image_test.h b/compiler/image_test.h
new file mode 100644
index 0000000..2f15ff4
--- /dev/null
+++ b/compiler/image_test.h
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_IMAGE_TEST_H_
+#define ART_COMPILER_IMAGE_TEST_H_
+
+#include "image.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "android-base/stringprintf.h"
+
+#include "art_method-inl.h"
+#include "base/unix_file/fd_file.h"
+#include "class_linker-inl.h"
+#include "compiler_callbacks.h"
+#include "common_compiler_test.h"
+#include "debug/method_debug_info.h"
+#include "dex/quick_compiler_callbacks.h"
+#include "driver/compiler_options.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
+#include "gc/space/image_space.h"
+#include "image_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
+#include "linker/multi_oat_relative_patcher.h"
+#include "lock_word.h"
+#include "mirror/object-inl.h"
+#include "oat_writer.h"
+#include "scoped_thread_state_change-inl.h"
+#include "signal_catcher.h"
+#include "utils.h"
+
+namespace art {
+
+static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS;
+
+struct CompilationHelper {
+  std::vector<std::string> dex_file_locations;
+  std::vector<ScratchFile> image_locations;
+  std::vector<std::unique_ptr<const DexFile>> extra_dex_files;
+  std::vector<ScratchFile> image_files;
+  std::vector<ScratchFile> oat_files;
+  std::vector<ScratchFile> vdex_files;
+  std::string image_dir;
+
+  void Compile(CompilerDriver* driver,
+               ImageHeader::StorageMode storage_mode);
+
+  std::vector<size_t> GetImageObjectSectionSizes();
+
+  ~CompilationHelper();
+};
+
+class ImageTest : public CommonCompilerTest {
+ protected:
+  virtual void SetUp() {
+    ReserveImageSpace();
+    CommonCompilerTest::SetUp();
+  }
+
+  void TestWriteRead(ImageHeader::StorageMode storage_mode);
+
+  void Compile(ImageHeader::StorageMode storage_mode,
+               CompilationHelper& out_helper,
+               const std::string& extra_dex = "",
+               const std::initializer_list<std::string>& image_classes = {});
+
+  void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
+    CommonCompilerTest::SetUpRuntimeOptions(options);
+    callbacks_.reset(new QuickCompilerCallbacks(
+        verification_results_.get(),
+        CompilerCallbacks::CallbackMode::kCompileBootImage));
+    options->push_back(std::make_pair("compilercallbacks", callbacks_.get()));
+  }
+
+  std::unordered_set<std::string>* GetImageClasses() OVERRIDE {
+    return new std::unordered_set<std::string>(image_classes_);
+  }
+
+  ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
+    for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
+      if (strcmp(origin->GetName(), m.GetName()) == 0 &&
+          origin->GetSignature() == m.GetSignature()) {
+        return &m;
+      }
+    }
+    return nullptr;
+  }
+
+ private:
+  std::unordered_set<std::string> image_classes_;
+};
+
+inline CompilationHelper::~CompilationHelper() {
+  for (ScratchFile& image_file : image_files) {
+    image_file.Unlink();
+  }
+  for (ScratchFile& oat_file : oat_files) {
+    oat_file.Unlink();
+  }
+  for (ScratchFile& vdex_file : vdex_files) {
+    vdex_file.Unlink();
+  }
+  const int rmdir_result = rmdir(image_dir.c_str());
+  CHECK_EQ(0, rmdir_result);
+}
+
+inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
+  std::vector<size_t> ret;
+  for (ScratchFile& image_file : image_files) {
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
+    CHECK(file.get() != nullptr);
+    ImageHeader image_header;
+    CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
+    CHECK(image_header.IsValid());
+    ret.push_back(image_header.GetImageSize());
+  }
+  return ret;
+}
+
+inline void CompilationHelper::Compile(CompilerDriver* driver,
+                                       ImageHeader::StorageMode storage_mode) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  std::vector<const DexFile*> class_path = class_linker->GetBootClassPath();
+
+  for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) {
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      // Inject in boot class path so that the compiler driver can see it.
+      class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get());
+    }
+    class_path.push_back(dex_file.get());
+  }
+
+  // Enable write for dex2dex.
+  for (const DexFile* dex_file : class_path) {
+    dex_file_locations.push_back(dex_file->GetLocation());
+    if (dex_file->IsReadOnly()) {
+      dex_file->EnableWrite();
+    }
+  }
+  {
+    // Create a generic tmp file, to be the base of the .art and .oat temporary files.
+    ScratchFile location;
+    for (int i = 0; i < static_cast<int>(class_path.size()); ++i) {
+      std::string cur_location =
+          android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i);
+      image_locations.push_back(ScratchFile(cur_location));
+    }
+  }
+  std::vector<std::string> image_filenames;
+  for (ScratchFile& file : image_locations) {
+    std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA));
+    image_filenames.push_back(image_filename);
+    size_t pos = image_filename.rfind('/');
+    CHECK_NE(pos, std::string::npos) << image_filename;
+    if (image_dir.empty()) {
+      image_dir = image_filename.substr(0, pos);
+      int mkdir_result = mkdir(image_dir.c_str(), 0700);
+      CHECK_EQ(0, mkdir_result) << image_dir;
+    }
+    image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str())));
+  }
+
+  std::vector<std::string> oat_filenames;
+  std::vector<std::string> vdex_filenames;
+  for (const std::string& image_filename : image_filenames) {
+    std::string oat_filename = ReplaceFileExtension(image_filename, "oat");
+    oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str())));
+    oat_filenames.push_back(oat_filename);
+    std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex");
+    vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str())));
+    vdex_filenames.push_back(vdex_filename);
+  }
+
+  std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map;
+  std::vector<const char*> oat_filename_vector;
+  for (const std::string& file : oat_filenames) {
+    oat_filename_vector.push_back(file.c_str());
+  }
+  std::vector<const char*> image_filename_vector;
+  for (const std::string& file : image_filenames) {
+    image_filename_vector.push_back(file.c_str());
+  }
+  size_t image_idx = 0;
+  for (const DexFile* dex_file : class_path) {
+    dex_file_to_oat_index_map.emplace(dex_file, image_idx);
+    ++image_idx;
+  }
+  // TODO: compile_pic should be a test argument.
+  std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver,
+                                                      kRequestedImageBase,
+                                                      /*compile_pic*/false,
+                                                      /*compile_app_image*/false,
+                                                      storage_mode,
+                                                      oat_filename_vector,
+                                                      dex_file_to_oat_index_map));
+  {
+    {
+      jobject class_loader = nullptr;
+      TimingLogger timings("ImageTest::WriteRead", false, false);
+      TimingLogger::ScopedTiming t("CompileAll", &timings);
+      driver->SetDexFilesForOatFile(class_path);
+      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
+
+      t.NewTiming("WriteElf");
+      SafeMap<std::string, std::string> key_value_store;
+      std::vector<const char*> dex_filename_vector;
+      for (size_t i = 0; i < class_path.size(); ++i) {
+        dex_filename_vector.push_back("");
+      }
+      key_value_store.Put(OatHeader::kBootClassPathKey,
+                          gc::space::ImageSpace::GetMultiImageBootClassPath(
+                              dex_filename_vector,
+                              oat_filename_vector,
+                              image_filename_vector));
+
+      std::vector<std::unique_ptr<ElfWriter>> elf_writers;
+      std::vector<std::unique_ptr<OatWriter>> oat_writers;
+      for (ScratchFile& oat_file : oat_files) {
+        elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(),
+                                                      driver->GetInstructionSetFeatures(),
+                                                      &driver->GetCompilerOptions(),
+                                                      oat_file.GetFile()));
+        elf_writers.back()->Start();
+        oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true,
+                                               &timings,
+                                               /*profile_compilation_info*/nullptr));
+      }
+
+      std::vector<OutputStream*> rodata;
+      std::vector<std::unique_ptr<MemMap>> opened_dex_files_map;
+      std::vector<std::unique_ptr<const DexFile>> opened_dex_files;
+      // Now that we have finalized key_value_store_, start writing the oat file.
+      for (size_t i = 0, size = oat_writers.size(); i != size; ++i) {
+        const DexFile* dex_file = class_path[i];
+        rodata.push_back(elf_writers[i]->StartRoData());
+        ArrayRef<const uint8_t> raw_dex_file(
+            reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()),
+            dex_file->GetHeader().file_size_);
+        oat_writers[i]->AddRawDexFileSource(raw_dex_file,
+                                            dex_file->GetLocation().c_str(),
+                                            dex_file->GetLocationChecksum());
+
+        std::unique_ptr<MemMap> cur_opened_dex_files_map;
+        std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files;
+        bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles(
+            kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(),
+            rodata.back(),
+            driver->GetInstructionSet(),
+            driver->GetInstructionSetFeatures(),
+            &key_value_store,
+            /* verify */ false,           // Dex files may be dex-to-dex-ed, don't verify.
+            /* update_input_vdex */ false,
+            &cur_opened_dex_files_map,
+            &cur_opened_dex_files);
+        ASSERT_TRUE(dex_files_ok);
+
+        if (cur_opened_dex_files_map != nullptr) {
+          opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map));
+          for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) {
+            // dex_file_oat_index_map_.emplace(dex_file.get(), i);
+            opened_dex_files.push_back(std::move(cur_dex_file));
+          }
+        } else {
+          ASSERT_TRUE(cur_opened_dex_files.empty());
+        }
+      }
+      bool image_space_ok = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(image_space_ok);
+
+      if (kIsVdexEnabled) {
+        for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
+          std::unique_ptr<BufferedOutputStream> vdex_out(
+              MakeUnique<BufferedOutputStream>(
+                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
+          oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
+          oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
+        }
+      }
+
+      for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
+        linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
+                                                driver->GetInstructionSetFeatures());
+        OatWriter* const oat_writer = oat_writers[i].get();
+        ElfWriter* const elf_writer = elf_writers[i].get();
+        std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
+        oat_writer->Initialize(driver, writer.get(), cur_dex_files);
+        oat_writer->PrepareLayout(&patcher);
+        size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
+        size_t text_size = oat_writer->GetOatSize() - rodata_size;
+        elf_writer->PrepareDynamicSection(rodata_size,
+                                          text_size,
+                                          oat_writer->GetBssSize(),
+                                          oat_writer->GetBssRootsOffset());
+
+        writer->UpdateOatFileLayout(i,
+                                    elf_writer->GetLoadedSize(),
+                                    oat_writer->GetOatDataOffset(),
+                                    oat_writer->GetOatSize());
+
+        bool rodata_ok = oat_writer->WriteRodata(rodata[i]);
+        ASSERT_TRUE(rodata_ok);
+        elf_writer->EndRoData(rodata[i]);
+
+        OutputStream* text = elf_writer->StartText();
+        bool text_ok = oat_writer->WriteCode(text);
+        ASSERT_TRUE(text_ok);
+        elf_writer->EndText(text);
+
+        bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u);
+        ASSERT_TRUE(header_ok);
+
+        writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader());
+
+        elf_writer->WriteDynamicSection();
+        elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
+
+        bool success = elf_writer->End();
+        ASSERT_TRUE(success);
+      }
+    }
+
+    bool success_image = writer->Write(kInvalidFd,
+                                       image_filename_vector,
+                                       oat_filename_vector);
+    ASSERT_TRUE(success_image);
+
+    for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) {
+      const char* oat_filename = oat_filenames[i].c_str();
+      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename));
+      ASSERT_TRUE(oat_file != nullptr);
+      bool success_fixup = ElfWriter::Fixup(oat_file.get(),
+                                            writer->GetOatDataBegin(i));
+      ASSERT_TRUE(success_fixup);
+      ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
+                                                  << oat_filename;
+    }
+  }
+}
+
+inline void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
+                        CompilationHelper& helper,
+                        const std::string& extra_dex,
+                        const std::initializer_list<std::string>& image_classes) {
+  for (const std::string& image_class : image_classes) {
+    image_classes_.insert(image_class);
+  }
+  CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
+  // Set inline filter values.
+  compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
+  image_classes_.clear();
+  if (!extra_dex.empty()) {
+    helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
+  }
+  helper.Compile(compiler_driver_.get(), storage_mode);
+  if (image_classes.begin() != image_classes.end()) {
+    // Make sure the class got initialized.
+    ScopedObjectAccess soa(Thread::Current());
+    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+    for (const std::string& image_class : image_classes) {
+      mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
+      EXPECT_TRUE(klass != nullptr);
+      EXPECT_TRUE(klass->IsInitialized());
+    }
+  }
+}
+
+inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
+  CompilationHelper helper;
+  Compile(storage_mode, /*out*/ helper);
+  std::vector<uint64_t> image_file_sizes;
+  for (ScratchFile& image_file : helper.image_files) {
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
+    ASSERT_TRUE(file.get() != nullptr);
+    ImageHeader image_header;
+    ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
+    ASSERT_TRUE(image_header.IsValid());
+    const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
+    ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
+    ASSERT_NE(0U, bitmap_section.Size());
+
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    ASSERT_TRUE(heap->HaveContinuousSpaces());
+    gc::space::ContinuousSpace* space = heap->GetNonMovingSpace();
+    ASSERT_FALSE(space->IsImageSpace());
+    ASSERT_TRUE(space != nullptr);
+    ASSERT_TRUE(space->IsMallocSpace());
+    image_file_sizes.push_back(file->GetLength());
+  }
+
+  ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr);
+  std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses());
+
+  // Need to delete the compiler since it has worker threads which are attached to runtime.
+  compiler_driver_.reset();
+
+  // Tear down old runtime before making a new one, clearing out misc state.
+
+  // Remove the reservation of the memory for use to load the image.
+  // Need to do this before we reset the runtime.
+  UnreserveImageSpace();
+
+  helper.extra_dex_files.clear();
+  runtime_.reset();
+  java_lang_dex_file_ = nullptr;
+
+  MemMap::Init();
+
+  RuntimeOptions options;
+  std::string image("-Ximage:");
+  image.append(helper.image_locations[0].GetFilename());
+  options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr)));
+  // By default the compiler this creates will not include patch information.
+  options.push_back(std::make_pair("-Xnorelocate", nullptr));
+
+  if (!Runtime::Create(options, false)) {
+    LOG(FATAL) << "Failed to create runtime";
+    return;
+  }
+  runtime_.reset(Runtime::Current());
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+  // give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  ScopedObjectAccess soa(Thread::Current());
+  ASSERT_TRUE(runtime_.get() != nullptr);
+  class_linker_ = runtime_->GetClassLinker();
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  ASSERT_TRUE(heap->HasBootImageSpace());
+  ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace());
+
+  // We loaded the runtime with an explicit image, so it must exist.
+  ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size());
+  for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) {
+    std::unique_ptr<const DexFile> dex(
+        LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str()));
+    ASSERT_TRUE(dex != nullptr);
+    uint64_t image_file_size = image_file_sizes[i];
+    gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i];
+    ASSERT_TRUE(image_space != nullptr);
+    if (storage_mode == ImageHeader::kStorageModeUncompressed) {
+      // Uncompressed, image should be smaller than file.
+      ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size);
+    } else if (image_file_size > 16 * KB) {
+      // Compressed, file should be smaller than image. Not really valid for small images.
+      ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize());
+    }
+
+    image_space->VerifyImageAllocations();
+    uint8_t* image_begin = image_space->Begin();
+    uint8_t* image_end = image_space->End();
+    if (i == 0) {
+      // This check is only valid for image 0.
+      CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin));
+    }
+    for (size_t j = 0; j < dex->NumClassDefs(); ++j) {
+      const DexFile::ClassDef& class_def = dex->GetClassDef(j);
+      const char* descriptor = dex->GetClassDescriptor(class_def);
+      mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor);
+      EXPECT_TRUE(klass != nullptr) << descriptor;
+      if (image_classes.find(descriptor) == image_classes.end()) {
+        EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end ||
+                    reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor;
+      } else {
+        // Image classes should be located inside the image.
+        EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor;
+        EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor;
+      }
+      EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false)));
+    }
+  }
+}
+
+
+}  // namespace art
+
+#endif  // ART_COMPILER_IMAGE_TEST_H_
diff --git a/compiler/image_write_read_test.cc b/compiler/image_write_read_test.cc
new file mode 100644
index 0000000..32c0b06
--- /dev/null
+++ b/compiler/image_write_read_test.cc
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "image_test.h"
+
+namespace art {
+
+TEST_F(ImageTest, WriteReadUncompressed) {
+  TestWriteRead(ImageHeader::kStorageModeUncompressed);
+}
+
+TEST_F(ImageTest, WriteReadLZ4) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4);
+}
+
+TEST_F(ImageTest, WriteReadLZ4HC) {
+  TestWriteRead(ImageHeader::kStorageModeLZ4HC);
+}
+
+}  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 2cb1b6c..47f840f 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -43,10 +43,11 @@
   enum class ThunkType {
     kMethodCall,              // Method call thunk.
     kBakerReadBarrierField,   // Baker read barrier, load field or array element at known offset.
+    kBakerReadBarrierArray,   // Baker read barrier, array load with index in register.
     kBakerReadBarrierRoot,    // Baker read barrier, GC root load.
   };
 
-  struct BakerReadBarrierOffsetParams {
+  struct BakerReadBarrierFieldParams {
     uint32_t holder_reg;      // Holder object for reading lock word.
     uint32_t base_reg;        // Base register, different from holder for large offset.
                               // If base differs from holder, it should be a pre-defined
@@ -54,9 +55,16 @@
                               // The offset is retrieved using introspection.
   };
 
+  struct BakerReadBarrierArrayParams {
+    uint32_t base_reg;        // Reference to the start of the data.
+    uint32_t dummy;           // Dummy field.
+                              // The index register is retrieved using introspection
+                              // to limit the number of thunks we need to emit.
+  };
+
   struct BakerReadBarrierRootParams {
     uint32_t root_reg;        // The register holding the GC root.
-    uint32_t dummy;
+    uint32_t dummy;           // Dummy field.
   };
 
   struct RawThunkParams {
@@ -66,8 +74,12 @@
 
   union ThunkParams {
     RawThunkParams raw_params;
-    BakerReadBarrierOffsetParams offset_params;
+    BakerReadBarrierFieldParams field_params;
+    BakerReadBarrierArrayParams array_params;
     BakerReadBarrierRootParams root_params;
+    static_assert(sizeof(raw_params) == sizeof(field_params), "field_params size check");
+    static_assert(sizeof(raw_params) == sizeof(array_params), "array_params size check");
+    static_assert(sizeof(raw_params) == sizeof(root_params), "root_params size check");
   };
 
   class ThunkKey {
@@ -78,9 +90,14 @@
       return type_;
     }
 
-    BakerReadBarrierOffsetParams GetOffsetParams() const {
+    BakerReadBarrierFieldParams GetFieldParams() const {
       DCHECK(type_ == ThunkType::kBakerReadBarrierField);
-      return params_.offset_params;
+      return params_.field_params;
+    }
+
+    BakerReadBarrierArrayParams GetArrayParams() const {
+      DCHECK(type_ == ThunkType::kBakerReadBarrierArray);
+      return params_.array_params;
     }
 
     BakerReadBarrierRootParams GetRootParams() const {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 551c73b..5c6fb50 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -29,6 +29,7 @@
 #include "mirror/array-inl.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
+#include "read_barrier.h"
 #include "utils/arm64/assembler_arm64.h"
 
 namespace art {
@@ -313,7 +314,17 @@
         uint32_t next_insn = GetInsn(code, literal_offset + 4u);
         // LDR (immediate) with correct base_reg.
         CheckValidReg(next_insn & 0x1fu);  // Check destination register.
-        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5));
+        CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetFieldParams().base_reg << 5));
+        break;
+      }
+      case ThunkType::kBakerReadBarrierArray: {
+        DCHECK_GE(code->size() - literal_offset, 8u);
+        uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+        // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+        // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+        CheckValidReg(next_insn & 0x1fu);  // Check destination register.
+        CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (key.GetArrayParams().base_reg << 5));
+        CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
         break;
       }
       case ThunkType::kBakerReadBarrierRoot: {
@@ -344,10 +355,16 @@
   ThunkParams params;
   switch (type) {
     case BakerReadBarrierKind::kField:
-      params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
-      CheckValidReg(params.offset_params.base_reg);
-      params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
-      CheckValidReg(params.offset_params.holder_reg);
+      params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+      CheckValidReg(params.field_params.base_reg);
+      params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
+      CheckValidReg(params.field_params.holder_reg);
+      break;
+    case BakerReadBarrierKind::kArray:
+      params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+      CheckValidReg(params.array_params.base_reg);
+      params.array_params.dummy = 0u;
+      DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
       break;
     case BakerReadBarrierKind::kGcRoot:
       params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
@@ -363,6 +380,9 @@
   static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
                 static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
                 "Thunk type translation check.");
+  static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset ==
+                static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray),
+                "Thunk type translation check.");
   static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
                 static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
                 "Thunk type translation check.");
@@ -394,7 +414,7 @@
   // Introduce a dependency on the lock_word including rb_state,
   // to prevent load-load reordering, and without using
   // a memory barrier (which would be more expensive).
-  __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32));
+  __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
   __ Br(lr);          // And return back to the function.
   // Note: The fake dependency is unnecessary for the slow path.
 }
@@ -419,8 +439,8 @@
       // and return to the LDR instruction to load the reference. Otherwise, use introspection
       // to load the reference and call the entrypoint (in IP1) that performs further checks
       // on the reference and marks it if needed.
-      auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg);
-      auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg);
+      auto holder_reg = Register::GetXRegFromCode(key.GetFieldParams().holder_reg);
+      auto base_reg = Register::GetXRegFromCode(key.GetFieldParams().base_reg);
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
       // If base_reg differs from holder_reg, the offset was too large and we must have
@@ -444,11 +464,31 @@
         // Add null check slow path. The stack map is at the address pointed to by LR.
         __ Bind(&throw_npe);
         int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset));
+        __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
         __ Br(ip0);
       }
       break;
     }
+    case ThunkType::kBakerReadBarrierArray: {
+      auto base_reg = Register::GetXRegFromCode(key.GetArrayParams().base_reg);
+      UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+      temps.Exclude(ip0, ip1);
+      vixl::aarch64::Label slow_path;
+      int32_t data_offset =
+          mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+      MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+      DCHECK_LT(lock_word.GetOffset(), 0);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      __ Bind(&slow_path);
+      MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+      __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
+      __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
+      __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
+                                            // a switch case target based on the index register.
+      __ Mov(ip0, base_reg);                // Move the base register to ip0.
+      __ Br(ip1);                           // Jump to the entrypoint's array switch case.
+      break;
+    }
     case ThunkType::kBakerReadBarrierRoot: {
       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
       // and it does not have a forwarding address), call the correct introspection entrypoint;
@@ -494,6 +534,7 @@
     case ThunkType::kMethodCall:
       return kMaxMethodCallPositiveDisplacement;
     case ThunkType::kBakerReadBarrierField:
+    case ThunkType::kBakerReadBarrierArray:
     case ThunkType::kBakerReadBarrierRoot:
       return kMaxBcondPositiveDisplacement;
   }
@@ -504,6 +545,7 @@
     case ThunkType::kMethodCall:
       return kMaxMethodCallNegativeDisplacement;
     case ThunkType::kBakerReadBarrierField:
+    case ThunkType::kBakerReadBarrierArray:
     case ThunkType::kBakerReadBarrierRoot:
       return kMaxBcondNegativeDisplacement;
   }
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index 7887cea..71ab70e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -19,6 +19,7 @@
 
 #include "base/array_ref.h"
 #include "base/bit_field.h"
+#include "base/bit_utils.h"
 #include "linker/arm/relative_patcher_arm_base.h"
 
 namespace art {
@@ -28,6 +29,7 @@
  public:
   enum class BakerReadBarrierKind : uint8_t {
     kField,   // Field get or array get with constant offset (i.e. constant index).
+    kArray,   // Array get with index in register.
     kGcRoot,  // GC root load.
     kLast
   };
@@ -40,6 +42,13 @@
            BakerReadBarrierSecondRegField::Encode(holder_reg);
   }
 
+  static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+    CheckValidReg(base_reg);
+    return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+           BakerReadBarrierFirstRegField::Encode(base_reg) |
+           BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+  }
+
   static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
     CheckValidReg(root_reg);
     return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
@@ -68,14 +77,14 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  protected:
-  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
-
   ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
   std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
   uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
   uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
 
  private:
+  static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
   static constexpr size_t kBitsForBakerReadBarrierKind =
       MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
   static constexpr size_t kBitsForRegister = 5u;
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b4d35ab..57ea886 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -18,6 +18,7 @@
 #include "linker/relative_patcher_test.h"
 #include "linker/arm64/relative_patcher_arm64.h"
 #include "lock_word.h"
+#include "mirror/array-inl.h"
 #include "mirror/object.h"
 #include "oat_quick_method_header.h"
 
@@ -46,9 +47,15 @@
   static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
   static constexpr uint32_t kBlMinusMax = 0x96000000u;
 
-  // LDR immediate, unsigned offset.
+  // LDR immediate, 32-bit, unsigned offset.
   static constexpr uint32_t kLdrWInsn = 0xb9400000u;
 
+  // LDR register, 32-bit, LSL #2.
+  static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u;
+
+  // LDUR, 32-bit.
+  static constexpr uint32_t kLdurWInsn = 0xb8400000u;
+
   // ADD/ADDS/SUB/SUBS immediate, 64-bit.
   static constexpr uint32_t kAddXInsn = 0x91000000u;
   static constexpr uint32_t kAddsXInsn = 0xb1000000u;
@@ -68,7 +75,7 @@
   static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
 
   // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
-  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011;
+  static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u;
 
   void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
     CHECK_LE(pos, code->size());
@@ -188,7 +195,7 @@
 
   std::vector<uint8_t> GenNops(size_t num_nops) {
     std::vector<uint8_t> result;
-    result.reserve(num_nops * 4u + 4u);
+    result.reserve(num_nops * 4u);
     for (size_t i = 0; i != num_nops; ++i) {
       PushBackInsn(&result, kNopInsn);
     }
@@ -228,7 +235,7 @@
     } else {
       LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn;
     }
-    uint32_t adrp = 0x90000000 |              // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
+    uint32_t adrp = 0x90000000u |             // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64)
         ((disp & 0x3000u) << (29 - 12)) |     // immlo = ((disp & 0x3000u) >> 12) is at bit 29,
         ((disp & 0xffffc000) >> (14 - 5)) |   // immhi = (disp >> 14) is at bit 5,
         // We take the sign bit from the disp, limiting disp to +- 2GiB.
@@ -471,6 +478,14 @@
     return patcher->CompileThunk(key);
   }
 
+  std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+    LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+        0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+    auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
+    ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+    return patcher->CompileThunk(key);
+  }
+
   std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
     LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
         0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
@@ -488,7 +503,7 @@
            (static_cast<uint32_t>(output_[offset + 3]) << 24);
   }
 
-  void TestBakerField(uint32_t offset, uint32_t root_reg);
+  void TestBakerField(uint32_t offset, uint32_t ref_reg);
 };
 
 const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -885,7 +900,7 @@
 
 TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
 
-void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) {
+void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
@@ -899,7 +914,7 @@
   uint32_t method_idx = 0u;
   for (uint32_t base_reg : valid_regs) {
     for (uint32_t holder_reg : valid_regs) {
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
       const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
       ASSERT_EQ(kMethodCodeSize, raw_code.size());
       ArrayRef<const uint8_t> code(raw_code);
@@ -922,7 +937,7 @@
       ++method_idx;
       uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
       uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
-      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+      uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg;
       const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
       ASSERT_EQ(kMethodCodeSize, expected_code.size());
       ASSERT_TRUE(
@@ -942,7 +957,7 @@
       if (holder_reg == base_reg) {
         // Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
         ASSERT_GE(output_.size() - gray_check_offset, 4u);
-        ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+        ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
         gray_check_offset +=4u;
       }
       // Verify that the lock word for gray bit check is loaded from the holder address.
@@ -955,12 +970,12 @@
           /* ip0 */ 16;
       EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
       // Verify the gray bit check.
-      const uint32_t check_gray_bit_witout_offset =
-          0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
-      EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f);
+      const uint32_t check_gray_bit_without_offset =
+          0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+      EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu);
       // Verify the fake dependency.
       const uint32_t fake_dependency =
-          0x8b408000 |              // ADD Xd, Xn, Xm, LSR 32
+          0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
           (/* ip0 */ 16 << 16) |    // Xm = ip0
           (base_reg << 5) |         // Xn = base_reg
           base_reg;                 // Xd = base_reg
@@ -973,19 +988,19 @@
   }
 }
 
-#define TEST_BAKER_FIELD(offset, root_reg)    \
+#define TEST_BAKER_FIELD(offset, ref_reg)     \
   TEST_F(Arm64RelativePatcherTestDefault,     \
-    BakerOffset##offset##_##root_reg) {       \
-    TestBakerField(offset, root_reg);         \
+    BakerOffset##offset##_##ref_reg) {        \
+    TestBakerField(offset, ref_reg);          \
   }
 
-TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15)
-TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29)
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15)
+TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29)
 
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
   // One thunk in the middle with maximum distance branches to it from both sides.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 4;
   const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1046,7 +1061,7 @@
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
   // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
   // earlier, so the thunk is emitted before the filler.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 0;
   const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1076,7 +1091,7 @@
 TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
   // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
   // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
-  // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+  // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
   constexpr uint32_t kLiteralOffset1 = 4;
   const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
   ArrayRef<const uint8_t> code1(raw_code1);
@@ -1132,7 +1147,88 @@
   ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
 }
 
-TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
+TEST_F(Arm64RelativePatcherTestDefault, BakerArray) {
+  uint32_t valid_regs[] = {
+      0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
+      20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+      // LR and SP/ZR are reserved.
+  };
+  auto ldr = [](uint32_t base_reg) {
+    uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+    uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+    return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg;
+  };
+  constexpr size_t kMethodCodeSize = 8u;
+  constexpr size_t kLiteralOffset = 0u;
+  uint32_t method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, raw_code.size());
+    ArrayRef<const uint8_t> code(raw_code);
+    const LinkerPatch patches[] = {
+        LinkerPatch::BakerReadBarrierBranchPatch(
+            kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+    };
+    AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+  }
+  Link();
+
+  // All thunks are at the end.
+  uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+  method_idx = 0u;
+  for (uint32_t base_reg : valid_regs) {
+    ++method_idx;
+    uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+    uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+    const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)});
+    ASSERT_EQ(kMethodCodeSize, expected_code.size());
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+    std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+    ASSERT_GT(output_.size(), thunk_offset);
+    ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+    ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+                                           expected_thunk.size());
+    if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+      DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+      ASSERT_TRUE(false);
+    }
+
+    // Verify that the lock word for gray bit check is loaded from the correct address
+    // before the base_reg which points to the array data.
+    static constexpr size_t kGrayCheckInsns = 5;
+    ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns);
+    int32_t data_offset =
+        mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+    int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+    ASSERT_LT(offset, 0);
+    const uint32_t load_lock_word =
+        kLdurWInsn |
+        ((offset & 0x1ffu) << 12) |
+        (base_reg << 5) |
+        /* ip0 */ 16;
+    EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset));
+    // Verify the gray bit check.
+    const uint32_t check_gray_bit_without_offset =
+        0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+    EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu);
+    // Verify the fake dependency.
+    const uint32_t fake_dependency =
+        0x8b408000u |             // ADD Xd, Xn, Xm, LSR 32
+        (/* ip0 */ 16 << 16) |    // Xm = ip0
+        (base_reg << 5) |         // Xn = base_reg
+        base_reg;                 // Xd = base_reg
+    EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u));
+    // Do not check the rest of the implementation.
+
+    // The next thunk follows on the next aligned offset.
+    thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+  }
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) {
   uint32_t valid_regs[] = {
       0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,         18, 19,  // IP0 and IP1 are reserved.
@@ -1180,7 +1276,7 @@
 
     // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
     ASSERT_GE(output_.size() - thunk_offset, 4u);
-    ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+    ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu);
     // Do not check the rest of the implementation.
 
     // The next thunk follows on the next aligned offset.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d7cc577..ebd578c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3067,6 +3067,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4629c54..78b627a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -91,6 +91,7 @@
 
 // Flags controlling the use of link-time generated thunks for Baker read barriers.
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
 constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
 
 // Some instructions have special requirements for a temporary, for example
@@ -2759,6 +2760,7 @@
     // Object ArrayGet with Baker's read barrier case.
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+    DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
     if (index.IsConstant()) {
       // Array load with a constant index can be treated as a field load.
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -2769,12 +2771,12 @@
                                                       obj.W(),
                                                       offset,
                                                       maybe_temp,
-                                                      /* needs_null_check */ true,
+                                                      /* needs_null_check */ false,
                                                       /* use_load_acquire */ false);
     } else {
       Register temp = WRegisterFrom(locations->GetTemp(0));
       codegen_->GenerateArrayLoadWithBakerReadBarrier(
-          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+          instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
     }
   } else {
     // General case.
@@ -5477,6 +5479,15 @@
   }
 }
 
+void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
@@ -5928,9 +5939,9 @@
           !Runtime::Current()->UseJitCompilation()) {
         // Note that we do not actually check the value of `GetIsGcMarking()`
         // to decide whether to mark the loaded GC root or not.  Instead, we
-        // load into `temp` the read barrier mark introspection entrypoint.
-        // If `temp` is null, it means that `GetIsGcMarking()` is false, and
-        // vice versa.
+        // load into `temp` (actually IP1) the read barrier mark introspection
+        // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+        // false, and vice versa.
         //
         // We use link-time generated thunks for the slow path. That thunk
         // checks the reference and jumps to the entrypoint if needed.
@@ -6054,24 +6065,24 @@
       !use_load_acquire &&
       !Runtime::Current()->UseJitCompilation()) {
     // Note that we do not actually check the value of `GetIsGcMarking()`
-    // to decide whether to mark the loaded GC root or not.  Instead, we
-    // load into `temp` the read barrier mark introspection entrypoint.
-    // If `temp` is null, it means that `GetIsGcMarking()` is false, and
-    // vice versa.
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually IP1) the read barrier mark introspection
+    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+    // false, and vice versa.
     //
     // We use link-time generated thunks for the slow path. That thunk checks
     // the holder and jumps to the entrypoint if needed. If the holder is not
     // gray, it creates a fake dependency and returns to the LDR instruction.
     //
     //     temp = Thread::Current()->pReadBarrierMarkIntrospection
-    //     lr = &return_address;
+    //     lr = &gray_return_address;
     //     if (temp != nullptr) {
     //        goto field_thunk<holder_reg, base_reg>(lr)
     //     }
     //   not_gray_return_address:
     //     // Original reference load. If the offset is too large to fit
     //     // into LDR, we use an adjusted base register here.
-    //     GcRoot<mirror::Object> root = *(obj+offset);
+    //     GcRoot<mirror::Object> reference = *(obj+offset);
     //   gray_return_address:
 
     DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6141,16 +6152,74 @@
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+
+  if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+      !Runtime::Current()->UseJitCompilation()) {
+    // Note that we do not actually check the value of `GetIsGcMarking()`
+    // to decide whether to mark the loaded reference or not.  Instead, we
+    // load into `temp` (actually IP1) the read barrier mark introspection
+    // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+    // false, and vice versa.
+    //
+    // We use link-time generated thunks for the slow path. That thunk checks
+    // the holder and jumps to the entrypoint if needed. If the holder is not
+    // gray, it creates a fake dependency and returns to the LDR instruction.
+    //
+    //     temp = Thread::Current()->pReadBarrierMarkIntrospection
+    //     lr = &gray_return_address;
+    //     if (temp != nullptr) {
+    //        goto field_thunk<holder_reg, base_reg>(lr)
+    //     }
+    //   not_gray_return_address:
+    //     // Original reference load. If the offset is too large to fit
+    //     // into LDR, we use an adjusted base register here.
+    //     GcRoot<mirror::Object> reference = data[index];
+    //   gray_return_address:
+
+    DCHECK(index.IsValid());
+    Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+    Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    DCHECK(temps.IsAvailable(ip0));
+    DCHECK(temps.IsAvailable(ip1));
+    temps.Exclude(ip0, ip1);
+    uint32_t custom_data =
+        linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+    vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+    // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+    DCHECK_EQ(ip0.GetCode(), 16u);
+    const int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+    __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+    __ Add(temp.X(), obj.X(), Operand(data_offset));
+    EmissionCheckScope guard(GetVIXLAssembler(),
+                             (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+    vixl::aarch64::Label return_address;
+    __ adr(lr, &return_address);
+    __ Bind(cbnz_label);
+    __ cbnz(ip1, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
+    static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+                  "Array LDR must be 1 instruction (4B) before the return address label; "
+                  " 2 instructions (8B) for heap poisoning.");
+    __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+    GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+    __ Bind(&return_address);
+    return;
+  }
+
   // Array cells are never volatile variables, therefore array loads
   // never use Load-Acquire instructions on ARM64.
   const bool use_load_acquire = false;
 
-  static_assert(
-      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
-      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
   // /* HeapReference<Object> */ ref =
   //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
-  size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
   GenerateReferenceLoadWithBakerReadBarrier(instruction,
                                             ref,
                                             obj,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 332ab49..3ded3e4 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -318,12 +318,13 @@
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
-  vixl::aarch64::MemOperand CreateVecMemRegisters(
+  vixl::aarch64::MemOperand VecAddress(
       HVecMemoryOperation* instruction,
-      Location* reg_loc,
-      bool is_load,
       // This function may acquire a scratch register.
-      vixl::aarch64::UseScratchRegisterScope* temps_scope);
+      vixl::aarch64::UseScratchRegisterScope* temps_scope,
+      size_t size,
+      bool is_string_char_at,
+      /*out*/ vixl::aarch64::Register* scratch);
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b6678b0..8744cc8 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3103,6 +3103,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index aa030b2..9736626 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -7766,6 +7766,15 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 3875c4b..03939e3 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -229,9 +229,10 @@
   // We switch to the table-based method starting with 7 cases.
   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 19250c6..6f37ed4 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -5653,6 +5653,15 @@
   }
 }
 
+void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index fd1a174..200e884 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -226,9 +226,10 @@
   // We switch to the table-based method starting with 7 cases.
   static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
 
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
  private:
   void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
-  void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* operation);
   void HandleCondition(HCondition* instruction);
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 93befa4..57f7e6b 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,6 +22,7 @@
 namespace art {
 namespace arm64 {
 
+using helpers::DRegisterFrom;
 using helpers::VRegisterFrom;
 using helpers::HeapOperand;
 using helpers::InputRegisterAt;
@@ -771,20 +772,22 @@
   }
 }
 
-// Helper to set up registers and address for vector memory operations.
-MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
+// Helper to set up locations for vector memory operations. Returns the memory operand and,
+// if used, sets the output parameter scratch to a temporary register used in this operand,
+// so that the client can release it right after the memory operand use.
+MemOperand InstructionCodeGeneratorARM64::VecAddress(
     HVecMemoryOperation* instruction,
-    Location* reg_loc,
-    bool is_load,
-    UseScratchRegisterScope* temps_scope) {
+    UseScratchRegisterScope* temps_scope,
+    size_t size,
+    bool is_string_char_at,
+    /*out*/ Register* scratch) {
   LocationSummary* locations = instruction->GetLocations();
   Register base = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
-  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
-
-  Primitive::Type packed_type = instruction->GetPackedType();
-  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
-  size_t shift = Primitive::ComponentSizeShift(packed_type);
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
+  size_t shift = ComponentSizeShiftWidth(size);
 
   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
@@ -793,10 +796,9 @@
     offset += Int64ConstantFrom(index) << shift;
     return HeapOperand(base, offset);
   } else {
-    Register temp = temps_scope->AcquireSameSizeAs(base);
-    __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift));
-
-    return HeapOperand(temp, offset);
+    *scratch = temps_scope->AcquireSameSizeAs(base);
+    __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
+    return HeapOperand(*scratch, offset);
   }
 }
 
@@ -805,15 +807,43 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
-  Location reg_loc = Location::NoLocation();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VRegister reg = VRegisterFrom(locations->Out());
   UseScratchRegisterScope temps(GetVIXLAssembler());
-  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true, &temps);
-  VRegister reg = VRegisterFrom(reg_loc);
+  Register scratch;
 
   switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      // Special handling of compressed/uncompressed string load.
+      if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        vixl::aarch64::Label uncompressed_load, done;
+        // Test compression bit.
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        Register length = temps.AcquireW();
+        __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
+        __ Tbnz(length.W(), 0, &uncompressed_load);
+        temps.Release(length);  // no longer needed
+        // Zero extend 8 compressed bytes into 8 chars.
+        __ Ldr(DRegisterFrom(locations->Out()).V8B(),
+               VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+        __ Uxtl(reg.V8H(), reg.V8B());
+        __ B(&done);
+        if (scratch.IsValid()) {
+          temps.Release(scratch);  // if used, no longer needed
+        }
+        // Load 8 direct uncompressed chars.
+        __ Bind(&uncompressed_load);
+        __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+        __ Bind(&done);
+        return;
+      }
+      FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimFloat:
@@ -821,7 +851,7 @@
     case Primitive::kPrimDouble:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Ldr(reg, mem);
+      __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -834,10 +864,11 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
-  Location reg_loc = Location::NoLocation();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VRegister reg = VRegisterFrom(locations->InAt(2));
   UseScratchRegisterScope temps(GetVIXLAssembler());
-  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false, &temps);
-  VRegister reg = VRegisterFrom(reg_loc);
+  Register scratch;
 
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
@@ -850,7 +881,7 @@
     case Primitive::kPrimDouble:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Str(reg, mem);
+      __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 013b092..5bb19c1 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -201,6 +201,7 @@
 
 void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  // Integral-abs requires a temporary for the comparison.
   if (instruction->GetPackedType() == Primitive::kPrimInt) {
     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   }
@@ -766,16 +767,10 @@
   }
 }
 
-// Helper to set up registers and address for vector memory operations.
-static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
-                                     Location* reg_loc,
-                                     bool is_load) {
-  LocationSummary* locations = instruction->GetLocations();
+// Helper to construct address for vector memory operations.
+static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
   Location base = locations->InAt(0);
   Location index = locations->InAt(1);
-  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
-  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
-  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
   ScaleFactor scale = TIMES_1;
   switch (size) {
     case 2: scale = TIMES_2; break;
@@ -783,22 +778,53 @@
     case 8: scale = TIMES_8; break;
     default: break;
   }
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
   return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
 }
 
 void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+  // String load requires a temporary for the compressed load.
+  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, instruction->IsStringCharAt());
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      // Special handling of compressed/uncompressed string load.
+      if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        NearLabel done, not_compressed;
+        XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+        // Test compression bit.
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
+        __ j(kNotZero, &not_compressed);
+        // Zero extend 8 compressed bytes into 8 chars.
+        __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+        __ pxor(tmp, tmp);
+        __ punpcklbw(reg, tmp);
+        __ jmp(&done);
+        // Load 4 direct uncompressed chars.
+        __ Bind(&not_compressed);
+        is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
+        __ Bind(&done);
+        return;
+      }
+      FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
@@ -825,9 +851,10 @@
 }
 
 void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
+  XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 66f19a4..6d4aae8 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -194,6 +194,7 @@
 
 void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  // Integral-abs requires a temporary for the comparison.
   if (instruction->GetPackedType() == Primitive::kPrimInt) {
     instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
   }
@@ -755,16 +756,10 @@
   }
 }
 
-// Helper to set up registers and address for vector memory operations.
-static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
-                                     Location* reg_loc,
-                                     bool is_load) {
-  LocationSummary* locations = instruction->GetLocations();
+// Helper to construct address for vector memory operations.
+static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) {
   Location base = locations->InAt(0);
   Location index = locations->InAt(1);
-  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
-  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
-  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
   ScaleFactor scale = TIMES_1;
   switch (size) {
     case 2: scale = TIMES_2; break;
@@ -772,22 +767,53 @@
     case 8: scale = TIMES_8; break;
     default: break;
   }
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
   return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
 }
 
 void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
   CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+  // String load requires a temporary for the compressed load.
+  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, instruction->IsStringCharAt());
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      // Special handling of compressed/uncompressed string load.
+      if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        NearLabel done, not_compressed;
+        XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+        // Test compression bit.
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
+        __ j(kNotZero, &not_compressed);
+        // Zero extend 8 compressed bytes into 8 chars.
+        __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+        __ pxor(tmp, tmp);
+        __ punpcklbw(reg, tmp);
+        __ jmp(&done);
+        // Load 8 direct uncompressed chars.
+        __ Bind(&not_compressed);
+        is_aligned16 ?  __ movdqa(reg, address) :  __ movdqu(reg, address);
+        __ Bind(&done);
+        return;
+      }
+      FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
@@ -814,9 +840,10 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
-  Location reg_loc = Location::NoLocation();
-  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
-  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  Address address = VecAddress(locations, size, /*is_string_char_at*/ false);
+  XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 08a752f..1e867dd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2057,6 +2057,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ff6e099..f413739 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2165,6 +2165,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 34b52a8..6a14045 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -341,7 +341,12 @@
     const HInstructionList& list = input->IsPhi()
         ? input->GetBlock()->GetPhis()
         : input->GetBlock()->GetInstructions();
-    if (!list.Contains(input)) {
+    if (input->GetBlock() == nullptr) {
+      AddError(StringPrintf("Input %d of instruction %d is not in any "
+                            "basic block of the control-flow graph.",
+                            input->GetId(),
+                            instruction->GetId()));
+    } else if (!list.Contains(input)) {
       AddError(StringPrintf("Input %d of instruction %d is not defined "
                             "in a basic block of the control-flow graph.",
                             input->GetId(),
@@ -497,8 +502,7 @@
                             "has a null pointer as last input.",
                             invoke->DebugName(),
                             invoke->GetId()));
-    }
-    if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
+    } else if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) {
       AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check "
                             "has a last instruction (%s:%d) which is neither a clinit check "
                             "nor a load class instruction.",
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 72cbd18..3b681c1 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -140,6 +140,14 @@
   DCHECK_NE(total_number_of_instructions_, 0u);
   DCHECK_NE(inlining_budget_, 0u);
 
+  // If we're compiling with a core image (which is only used for
+  // test purposes), honor inlining directives in method names:
+  // - if a method's name contains the substring "$inline$", ensure
+  //   that this method is actually inlined;
+  // - if a method's name contains the substring "$noinline$", do not
+  //   inline that method.
+  const bool honor_inlining_directives = IsCompilingWithCoreImage();
+
   // Keep a copy of all blocks when starting the visit.
   ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
   DCHECK(!blocks.empty());
@@ -152,7 +160,7 @@
       HInvoke* call = instruction->AsInvoke();
       // As long as the call is not intrinsified, it is worth trying to inline.
       if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
-        if (kIsDebugBuild && IsCompilingWithCoreImage()) {
+        if (honor_inlining_directives) {
           // Debugging case: directives in method names control or assert on inlining.
           std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
               call->GetDexMethodIndex(), /* with_signature */ false);
@@ -1472,8 +1480,13 @@
         }
       }
       if (needs_constructor_barrier) {
-        HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
-        invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
+        // See CompilerDriver::RequiresConstructorBarrier for more details.
+        DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence";
+
+        HConstructorFence* constructor_fence =
+            new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena());
+        invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence,
+                                                                invoke_instruction);
       }
       *return_replacement = nullptr;
       break;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 978c6a2..8b79da8 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -451,10 +451,13 @@
                                                               referrer_method_id.class_idx_,
                                                               parameter_index++,
                                                               Primitive::kPrimNot,
-                                                              true);
+                                                              /* is_this */ true);
     AppendInstruction(parameter);
     UpdateLocal(locals_index++, parameter);
     number_of_parameters--;
+    current_this_parameter_ = parameter;
+  } else {
+    DCHECK(current_this_parameter_ == nullptr);
   }
 
   const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
@@ -465,7 +468,7 @@
         arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
         parameter_index++,
         Primitive::GetType(shorty[shorty_pos]),
-        false);
+        /* is_this */ false);
     ++shorty_pos;
     AppendInstruction(parameter);
     // Store the parameter value in the local that the dex code will use
@@ -588,6 +591,8 @@
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
+// Does the method being compiled need any constructor barriers being inserted?
+// (Always 'false' for methods that aren't <init>.)
 static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
   // Can be null in unit tests only.
   if (UNLIKELY(cu == nullptr)) {
@@ -596,6 +601,11 @@
 
   Thread* self = Thread::Current();
   return cu->IsConstructor()
+      && !cu->IsStatic()
+      // RequiresConstructorBarrier must only be queried for <init> methods;
+      // it's effectively "false" for every other method.
+      //
+      // See CompilerDriver::RequiresConstructBarrier for more explanation.
       && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
 }
 
@@ -639,13 +649,24 @@
                                       Primitive::Type type,
                                       uint32_t dex_pc) {
   if (type == Primitive::kPrimVoid) {
+    // Only <init> (which is a return-void) could possibly have a constructor fence.
     // This may insert additional redundant constructor fences from the super constructors.
     // TODO: remove redundant constructor fences (b/36656456).
     if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
-      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+      // Compiling instance constructor.
+      if (kIsDebugBuild) {
+        std::string method_name = graph_->GetMethodName();
+        CHECK_EQ(std::string("<init>"), method_name);
+      }
+
+      HInstruction* fence_target = current_this_parameter_;
+      DCHECK(fence_target != nullptr);
+
+      AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
     }
     AppendInstruction(new (arena_) HReturnVoid(dex_pc));
   } else {
+    DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_));
     HInstruction* value = LoadLocal(instruction.VRegA(), type);
     AppendInstruction(new (arena_) HReturn(value, dex_pc));
   }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 7fdc188..2fb5c7b 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -62,6 +62,7 @@
         current_block_(nullptr),
         current_locals_(nullptr),
         latest_result_(nullptr),
+        current_this_parameter_(nullptr),
         compiler_driver_(driver),
         code_generator_(code_generator),
         dex_compilation_unit_(dex_compilation_unit),
@@ -325,6 +326,11 @@
   HBasicBlock* current_block_;
   ArenaVector<HInstruction*>* current_locals_;
   HInstruction* latest_result_;
+  // Current "this" parameter.
+  // Valid only after InitializeParameters() finishes.
+  // * Null for static methods.
+  // * Non-null for instance methods.
+  HParameterValue* current_this_parameter_;
 
   CompilerDriver* const compiler_driver_;
 
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 7d1f146..c39e5f4 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -247,6 +247,7 @@
       access->GetType() == Primitive::kPrimNot) {
     // For object arrays, the read barrier instrumentation requires
     // the original array pointer.
+    // TODO: This can be relaxed for Baker CC.
     return false;
   }
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 41df56b..bfe04f5 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -32,7 +32,7 @@
 namespace mips {
 
 IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen)
-  : arena_(codegen->GetGraph()->GetArena()) {
+  : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) {
 }
 
 MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() {
@@ -3133,6 +3133,89 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+// long java.lang.Integer.valueOf(long)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  MipsAssembler* assembler = GetAssembler();
+  InstructionCodeGeneratorMIPS* icodegen =
+      down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor());
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadConst32(out, address);
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadConst32(calling_convention.GetRegisterAt(0), address);
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    MipsLabel allocate, done;
+    int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
+
+    // Is (info.low <= in) && (in <= info.high)?
+    __ Addiu32(out, in, -info.low);
+    // As unsigned quantities is out < (info.high - info.low + 1)?
+    if (IsInt<16>(count)) {
+      __ Sltiu(AT, out, count);
+    } else {
+      __ LoadConst32(AT, count);
+      __ Sltu(AT, out, AT);
+    }
+    // Branch if out >= (info.high - info.low + 1).
+    // This means that "in" is outside of the range [info.low, info.high].
+    __ Beqz(AT, &allocate);
+
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadConst32(TMP, data_offset + address);
+    __ ShiftAndAdd(out, out, TMP, TIMES_4);
+    __ Lw(out, out, 0);
+    __ MaybeUnpoisonHeapReference(out);
+    __ B(&done);
+
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadConst32(calling_convention.GetRegisterAt(0), address);
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 // Unimplemented intrinsics.
 
 UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
@@ -3162,8 +3245,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
 
-UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
-
 UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index e134cb8..eaadad2 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -49,6 +49,7 @@
   bool TryDispatch(HInvoke* invoke);
 
  private:
+  CodeGeneratorMIPS* codegen_;
   ArenaAllocator* arena_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index b57b41f..c5e1160 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -32,7 +32,7 @@
 namespace mips64 {
 
 IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen)
-  : arena_(codegen->GetGraph()->GetArena()) {
+  : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) {
 }
 
 Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() {
@@ -2564,6 +2564,84 @@
   GenFPToFPCall(invoke, codegen_, kQuickTanh);
 }
 
+// long java.lang.Integer.valueOf(long)
+void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  Mips64Assembler* assembler = GetAssembler();
+  InstructionCodeGeneratorMIPS64* icodegen =
+      down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor());
+
+  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadConst64(out, address);
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadConst64(calling_convention.GetRegisterAt(0), address);
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+    Mips64Label allocate, done;
+    int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
+
+    // Is (info.low <= in) && (in <= info.high)?
+    __ Addiu32(out, in, -info.low);
+    // As unsigned quantities is out < (info.high - info.low + 1)?
+    __ LoadConst32(AT, count);
+    // Branch if out >= (info.high - info.low + 1).
+    // This means that "in" is outside of the range [info.low, info.high].
+    __ Bgeuc(out, AT, &allocate);
+
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadConst64(TMP, data_offset + address);
+    __ Dlsa(out, out, TMP, TIMES_4);
+    __ Lwu(out, out, 0);
+    __ MaybeUnpoisonHeapReference(out);
+    __ Bc(&done);
+
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadConst64(calling_convention.GetRegisterAt(0), address);
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
 
@@ -2583,8 +2661,6 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
 
-UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
-
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 5b95c26..179627a 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -49,6 +49,7 @@
   bool TryDispatch(HInvoke* invoke);
 
  private:
+  CodeGeneratorMIPS64* codegen_;
   ArenaAllocator* arena_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 48699b3..8d8cc93 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -566,14 +566,22 @@
       store->GetBlock()->RemoveInstruction(store);
     }
 
-    // Eliminate allocations that are not used.
+    // Eliminate singleton-classified instructions:
+    //   * - Constructor fences (they never escape this thread).
+    //   * - Allocations (if they are unused).
     for (HInstruction* new_instance : singleton_new_instances_) {
+      HConstructorFence::RemoveConstructorFences(new_instance);
+
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
         new_instance->GetBlock()->RemoveInstruction(new_instance);
       }
     }
     for (HInstruction* new_array : singleton_new_arrays_) {
+      // TODO: Delete constructor fences for new-array
+      // In the future HNewArray instructions will have HConstructorFence's for them.
+      // HConstructorFence::RemoveConstructorFences(new_array);
+
       if (!new_array->HasNonEnvironmentUses()) {
         new_array->RemoveEnvironmentUsers();
         new_array->GetBlock()->RemoveInstruction(new_array);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8f52812..bbc55dd 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -733,12 +733,6 @@
     }
     return true;
   } else if (instruction->IsArrayGet()) {
-    // Strings are different, with a different offset to the actual data
-    // and some compressed to save memory. For now, all cases are rejected
-    // to avoid the complexity.
-    if (instruction->AsArrayGet()->IsStringCharAt()) {
-      return false;
-    }
     // Accept a right-hand-side array base[index] for
     // (1) exact matching vector type,
     // (2) loop-invariant base,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca953a1..f250c1a 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -528,6 +528,15 @@
   return cached_current_method_;
 }
 
+const char* HGraph::GetMethodName() const {
+  const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
+  return dex_file_.GetMethodName(method_id);
+}
+
+std::string HGraph::PrettyMethod(bool with_signature) const {
+  return dex_file_.PrettyMethod(method_idx_, with_signature);
+}
+
 HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) {
   switch (type) {
     case Primitive::Type::kPrimBoolean:
@@ -1150,6 +1159,81 @@
   }
 }
 
+void HVariableInputSizeInstruction::RemoveAllInputs() {
+  RemoveAsUserOfAllInputs();
+  DCHECK(!HasNonEnvironmentUses());
+
+  inputs_.clear();
+  DCHECK_EQ(0u, InputCount());
+}
+
+void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+  DCHECK(instruction->GetBlock() != nullptr);
+  // Removing constructor fences only makes sense for instructions with an object return type.
+  DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
+
+  // Efficient implementation that simultaneously (in one pass):
+  // * Scans the uses list for all constructor fences.
+  // * Deletes that constructor fence from the uses list of `instruction`.
+  // * Deletes `instruction` from the constructor fence's inputs.
+  // * Deletes the constructor fence if it now has 0 inputs.
+
+  const HUseList<HInstruction*>& uses = instruction->GetUses();
+  // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt.
+  for (auto it = uses.begin(), end = uses.end(); it != end; ) {
+    const HUseListNode<HInstruction*>& use_node = *it;
+    HInstruction* const use_instruction = use_node.GetUser();
+
+    // Advance the iterator immediately once we fetch the use_node.
+    // Warning: If the input is removed, the current iterator becomes invalid.
+    ++it;
+
+    if (use_instruction->IsConstructorFence()) {
+      HConstructorFence* ctor_fence = use_instruction->AsConstructorFence();
+      size_t input_index = use_node.GetIndex();
+
+      // Process the candidate instruction for removal
+      // from the graph.
+
+      // Constructor fence instructions are never
+      // used by other instructions.
+      //
+      // If we wanted to make this more generic, it
+      // could be a runtime if statement.
+      DCHECK(!ctor_fence->HasUses());
+
+      // A constructor fence's return type is "kPrimVoid"
+      // and therefore it can't have any environment uses.
+      DCHECK(!ctor_fence->HasEnvironmentUses());
+
+      // Remove the inputs first, otherwise removing the instruction
+      // will try to remove its uses while we are already removing uses
+      // and this operation will fail.
+      DCHECK_EQ(instruction, ctor_fence->InputAt(input_index));
+
+      // Removing the input will also remove the `use_node`.
+      // (Do not look at `use_node` after this, it will be a dangling reference).
+      ctor_fence->RemoveInputAt(input_index);
+
+      // Once all inputs are removed, the fence is considered dead and
+      // is removed.
+      if (ctor_fence->InputCount() == 0u) {
+        ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+      }
+    }
+  }
+
+  if (kIsDebugBuild) {
+    // Post-condition checks:
+    // * None of the uses of `instruction` are a constructor fence.
+    // * The `instruction` itself did not get removed from a block.
+    for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) {
+      CHECK(!use_node.GetUser()->IsConstructorFence());
+    }
+    CHECK(instruction->GetBlock() != nullptr);
+  }
+}
+
 #define DEFINE_ACCEPT(name, super)                                             \
 void H##name::Accept(HGraphVisitor* visitor) {                                 \
   visitor->Visit##name(this);                                                  \
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8368026..e40361e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -46,6 +46,7 @@
 
 class GraphChecker;
 class HBasicBlock;
+class HConstructorFence;
 class HCurrentMethod;
 class HDoubleConstant;
 class HEnvironment;
@@ -57,6 +58,7 @@
 class HInvoke;
 class HLongConstant;
 class HNullConstant;
+class HParameterValue;
 class HPhi;
 class HSuspendCheck;
 class HTryBoundary;
@@ -537,6 +539,12 @@
     return method_idx_;
   }
 
+  // Get the method name (without the signature), e.g. "<init>"
+  const char* GetMethodName() const;
+
+  // Get the pretty method name (class + name + optionally signature).
+  std::string PrettyMethod(bool with_signature = true) const;
+
   InvokeType GetInvokeType() const {
     return invoke_type_;
   }
@@ -1297,6 +1305,7 @@
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
+  M(ConstructorFence, Instruction)                                      \
   M(CurrentMethod, Instruction)                                         \
   M(ShouldDeoptimizeFlag, Instruction)                                  \
   M(Deoptimize, Instruction)                                            \
@@ -1476,8 +1485,11 @@
 template <typename T>
 class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
+  // Get the instruction which has this use as one of the inputs.
   T GetUser() const { return user_; }
+  // Get the position of the input record that this use corresponds to.
   size_t GetIndex() const { return index_; }
+  // Set the position of the input record that this use corresponds to.
   void SetIndex(size_t index) { index_ = index; }
 
   // Hook for the IntrusiveForwardList<>.
@@ -2037,7 +2049,8 @@
         !IsNativeDebugInfo() &&
         !IsParameterValue() &&
         // If we added an explicit barrier then we should keep it.
-        !IsMemoryBarrier();
+        !IsMemoryBarrier() &&
+        !IsConstructorFence();
   }
 
   bool IsDeadAndRemovable() const {
@@ -2431,6 +2444,11 @@
   void InsertInputAt(size_t index, HInstruction* input);
   void RemoveInputAt(size_t index);
 
+  // Removes all the inputs.
+  // Also removes this instructions from each input's use list
+  // (for non-environment uses only).
+  void RemoveAllInputs();
+
  protected:
   HVariableInputSizeInstruction(SideEffects side_effects,
                                 uint32_t dex_pc,
@@ -5069,7 +5087,7 @@
   const DexFile& GetDexFile() const { return dex_file_; }
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
-  bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); }
+  bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
 
   bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
   void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
@@ -5377,10 +5395,16 @@
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
     // TODO: We can be smarter here.
-    // Currently, the array access is always preceded by an ArrayLength or a NullCheck
-    // which generates the implicit null check. There are cases when these can be removed
-    // to produce better code. If we ever add optimizations to do so we should allow an
-    // implicit check here (as long as the address falls in the first page).
+    // Currently, unless the array is the result of NewArray, the array access is always
+    // preceded by some form of null NullCheck necessary for the bounds check, usually
+    // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for
+    // dynamic BCE. There are cases when these could be removed to produce better code.
+    // If we ever add optimizations to do so we should allow an implicit check here
+    // (as long as the address falls in the first page).
+    //
+    // As an example of such fancy optimization, we could eliminate BoundsCheck for
+    //     a = cond ? new int[1] : null;
+    //     a[0];  // The Phi does not need bounds check for either input.
     return false;
   }
 
@@ -6501,6 +6525,137 @@
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
+// A constructor fence orders all prior stores to fields that could be accessed via a final field of
+// the specified object(s), with respect to any subsequent store that might "publish"
+// (i.e. make visible) the specified object to another thread.
+//
+// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens
+// for all final fields (that were set) at the end of the invoked constructor.
+//
+// The constructor fence models the freeze actions for the final fields of an object
+// being constructed (semantically at the end of the constructor). Constructor fences
+// have a per-object affinity; two separate objects being constructed get two separate
+// constructor fences.
+//
+// (Note: that if calling a super-constructor or forwarding to another constructor,
+// the freezes would happen at the end of *that* constructor being invoked).
+//
+// The memory model guarantees that when the object being constructed is "published" after
+// constructor completion (i.e. escapes the current thread via a store), then any final field
+// writes must be observable on other threads (once they observe that publication).
+//
+// Further, anything written before the freeze, and read by dereferencing through the final field,
+// must also be visible (so final object field could itself have an object with non-final fields;
+// yet the freeze must also extend to them).
+//
+// Constructor example:
+//
+//     class HasFinal {
+//        final int field;                              Optimizing IR for <init>()V:
+//        HasFinal() {
+//          field = 123;                                HInstanceFieldSet(this, HasFinal.field, 123)
+//          // freeze(this.field);                      HConstructorFence(this)
+//        }                                             HReturn
+//     }
+//
+// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of
+// already-initialized classes; in that case the allocation must act as a "default-initializer"
+// of the object which effectively writes the class pointer "final field".
+//
+// For example, we can model default-initialiation as roughly the equivalent of the following:
+//
+//     class Object {
+//       private final Class header;
+//     }
+//
+//  Java code:                                           Optimizing IR:
+//
+//     T new_instance<T>() {
+//       Object obj = allocate_memory(T.class.size);     obj = HInvoke(art_quick_alloc_object, T)
+//       obj.header = T.class;                           // header write is done by above call.
+//       // freeze(obj.header)                           HConstructorFence(obj)
+//       return (T)obj;
+//     }
+//
+// See also:
+// * CompilerDriver::RequiresConstructorBarrier
+// * QuasiAtomic::ThreadFenceForConstructor
+//
+class HConstructorFence FINAL : public HVariableInputSizeInstruction {
+                                  // A fence has variable inputs because the inputs can be removed
+                                  // after prepare_for_register_allocation phase.
+                                  // (TODO: In the future a fence could freeze multiple objects
+                                  //        after merging two fences together.)
+ public:
+  // `fence_object` is the reference that needs to be protected for correct publication.
+  //
+  // It makes sense in the following situations:
+  // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true).
+  // * new-instance-like instructions, it's the return value (i.e. HNewInstance).
+  //
+  // After construction the `fence_object` becomes the 0th input.
+  // This is not an input in a real sense, but just a convenient place to stash the information
+  // about the associated object.
+  HConstructorFence(HInstruction* fence_object,
+                    uint32_t dex_pc,
+                    ArenaAllocator* arena)
+    // We strongly suspect there is not a more accurate way to describe the fine-grained reordering
+    // constraints described in the class header. We claim that these SideEffects constraints
+    // enforce a superset of the real constraints.
+    //
+    // The ordering described above is conservatively modeled with SideEffects as follows:
+    //
+    // * To prevent reordering of the publication stores:
+    // ----> "Reads of objects" is the initial SideEffect.
+    // * For every primitive final field store in the constructor:
+    // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect.
+    // * If there are any stores to reference final fields in the constructor:
+    // ----> Use a more conservative "AllReads" SideEffect because any stores to any references
+    //       that are reachable from `fence_object` also need to be prevented for reordering
+    //       (and we do not want to do alias analysis to figure out what those stores are).
+    //
+    // In the implementation, this initially starts out as an "all reads" side effect; this is an
+    // even more conservative approach than the one described above, and prevents all of the
+    // above reordering without analyzing any of the instructions in the constructor.
+    //
+    // If in a later phase we discover that there are no writes to reference final fields,
+    // we can refine the side effect to a smaller set of type reads (see above constraints).
+      : HVariableInputSizeInstruction(SideEffects::AllReads(),
+                                      dex_pc,
+                                      arena,
+                                      /* number_of_inputs */ 1,
+                                      kArenaAllocConstructorFenceInputs) {
+    DCHECK(fence_object != nullptr);
+    SetRawInputAt(0, fence_object);
+  }
+
+  // The object associated with this constructor fence.
+  //
+  // (Note: This will be null after the prepare_for_register_allocation phase,
+  // as all constructor fence inputs are removed there).
+  HInstruction* GetFenceObject() const {
+    return InputAt(0);
+  }
+
+  // Find all the HConstructorFence uses (`fence_use`) for `this` and:
+  // - Delete `fence_use` from `this`'s use list.
+  // - Delete `this` from `fence_use`'s inputs list.
+  // - If the `fence_use` is dead, remove it from the graph.
+  //
+  // A fence is considered dead once it no longer has any uses
+  // and all of the inputs are dead.
+  //
+  // This must *not* be called during/after prepare_for_register_allocation,
+  // because that removes all the inputs to the fences but the fence is actually
+  // still considered live.
+  static void RemoveConstructorFences(HInstruction* instruction);
+
+  DECLARE_INSTRUCTION(ConstructorFence);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HConstructorFence);
+};
+
 class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
   enum class OperationKind {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 66bfea9..c3c141b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -167,6 +167,13 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  // Delete all the inputs to the constructor fence;
+  // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a
+  // LocationSummary in the LocationsBuilder.
+  constructor_fence->RemoveAllInputs();
+}
+
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
     HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 7ffbe44..395d4ba 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -43,6 +43,7 @@
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
+  void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 57223b5..f4afb33 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1356,6 +1356,106 @@
   EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12);
 }
 
+void Mips64Assembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10);
+}
+
+void Mips64Assembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10);
+}
+
 void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b);
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 666c693..6ac3361 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -682,6 +682,26 @@
   void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index f2e3b16..084ce6f 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -2668,6 +2668,106 @@
             "mod_u.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Add_aB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Add_aD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ave_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Aver_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.d");
+}
+
 TEST_F(AssemblerMIPS64Test, FaddW) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"),
             "fadd.w");
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index c286b82..0ff9fc6 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -78,7 +78,7 @@
     mutable FreeByStartSet::const_iterator free_by_start_entry;
   };
   struct FreeBySizeComparator {
-    bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) {
+    bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const {
       if (lhs.size != rhs.size) {
         return lhs.size < rhs.size;
       } else {
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 4cba36a..9fd42d2 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -349,23 +349,23 @@
   UsageError("  --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor.");
   UsageError("      Cannot be used together with --profile-file.");
   UsageError("");
-  UsageError("  --swap-file=<file-name>:  specifies a file to use for swap.");
+  UsageError("  --swap-file=<file-name>: specifies a file to use for swap.");
   UsageError("      Example: --swap-file=/data/tmp/swap.001");
   UsageError("");
-  UsageError("  --swap-fd=<file-descriptor>:  specifies a file to use for swap (by descriptor).");
+  UsageError("  --swap-fd=<file-descriptor>: specifies a file to use for swap (by descriptor).");
   UsageError("      Example: --swap-fd=10");
   UsageError("");
-  UsageError("  --swap-dex-size-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("  --swap-dex-size-threshold=<size>: specifies the minimum total dex file size in");
   UsageError("      bytes to allow the use of swap.");
   UsageError("      Example: --swap-dex-size-threshold=1000000");
   UsageError("      Default: %zu", kDefaultMinDexFileCumulativeSizeForSwap);
   UsageError("");
-  UsageError("  --swap-dex-count-threshold=<count>:  specifies the minimum number of dex files to");
+  UsageError("  --swap-dex-count-threshold=<count>: specifies the minimum number of dex files to");
   UsageError("      allow the use of swap.");
   UsageError("      Example: --swap-dex-count-threshold=10");
   UsageError("      Default: %zu", kDefaultMinDexFilesForSwap);
   UsageError("");
-  UsageError("  --very-large-app-threshold=<size>:  specifies the minimum total dex file size in");
+  UsageError("  --very-large-app-threshold=<size>: specifies the minimum total dex file size in");
   UsageError("      bytes to consider the input \"very large\" and punt on the compilation.");
   UsageError("      Example: --very-large-app-threshold=100000000");
   UsageError("");
@@ -380,6 +380,14 @@
   UsageError("");
   UsageError("  --force-determinism: force the compiler to emit a deterministic output.");
   UsageError("");
+  UsageError("  --dump-cfg=<cfg-file>: dump control-flow graphs (CFGs) to specified file.");
+  UsageError("      Example: --dump-cfg=output.cfg");
+  UsageError("");
+  UsageError("  --dump-cfg-append: when dumping CFGs to an existing file, append new CFG data to");
+  UsageError("      existing data (instead of overwriting existing data with new data, which is");
+  UsageError("      the default behavior). This option is only meaningful when used with");
+  UsageError("      --dump-cfg.");
+  UsageError("");
   UsageError("  --classpath-dir=<directory-path>: directory used to resolve relative class paths.");
   UsageError("");
   std::cerr << "See log for usage error information\n";
@@ -2406,6 +2414,8 @@
     if (!IsBootImage()) {
       raw_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
     }
+    // Never allow implicit image compilation.
+    raw_options.push_back(std::make_pair("-Xnoimage-dex2oat", nullptr));
     // Disable libsigchain. We don't don't need it during compilation and it prevents us
     // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
     raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index a267766..d546072 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -884,6 +884,7 @@
 };
 
 TEST_F(Dex2oatReturnCodeTest, TestCreateRuntime) {
+  TEST_DISABLED_FOR_MEMORY_TOOL();  // b/19100793
   int status = RunTest({ "--boot-image=/this/does/not/exist/yolo.oat" });
   EXPECT_EQ(static_cast<int>(dex2oat::ReturnCode::kCreateRuntime), WEXITSTATUS(status)) << output_;
 }
diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp
index 4b65c52..a2116cd 100644
--- a/dexlayout/Android.bp
+++ b/dexlayout/Android.bp
@@ -49,6 +49,7 @@
     shared_libs: [
         "libart",
         "libart-dexlayout",
+        "libbase",
     ],
 }
 
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 3f715cf..f1c6f67 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -451,8 +451,8 @@
   }
   uint8_t visibility = annotation->visibility_;
   const uint8_t* annotation_data = annotation->annotation_;
-  EncodedValue* encoded_value =
-      ReadEncodedValue(&annotation_data, DexFile::kDexAnnotationAnnotation, 0);
+  std::unique_ptr<EncodedValue> encoded_value(
+      ReadEncodedValue(&annotation_data, DexFile::kDexAnnotationAnnotation, 0));
   // TODO: Calculate the size of the annotation.
   AnnotationItem* annotation_item =
       new AnnotationItem(visibility, encoded_value->ReleaseEncodedAnnotation());
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index 38faf96..3c627ea 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -170,14 +170,14 @@
   }
 
   // Open profile file.
-  ProfileCompilationInfo* profile_info = nullptr;
+  std::unique_ptr<ProfileCompilationInfo> profile_info;
   if (options.profile_file_name_) {
     int profile_fd = open(options.profile_file_name_, O_RDONLY);
     if (profile_fd < 0) {
       fprintf(stderr, "Can't open %s\n", options.profile_file_name_);
       return 1;
     }
-    profile_info = new ProfileCompilationInfo();
+    profile_info.reset(new ProfileCompilationInfo());
     if (!profile_info->Load(profile_fd)) {
       fprintf(stderr, "Can't read profile info from %s\n", options.profile_file_name_);
       return 1;
@@ -185,13 +185,19 @@
   }
 
   // Create DexLayout instance.
-  DexLayout dex_layout(options, profile_info, out_file);
+  DexLayout dex_layout(options, profile_info.get(), out_file);
 
   // Process all files supplied on command line.
   int result = 0;
   while (optind < argc) {
     result |= dex_layout.ProcessFile(argv[optind++]);
   }  // while
+
+  if (options.output_file_name_) {
+    CHECK(out_file != nullptr && out_file != stdout);
+    fclose(out_file);
+  }
+
   return result != 0;
 }
 
diff --git a/dexoptanalyzer/dexoptanalyzer.cc b/dexoptanalyzer/dexoptanalyzer.cc
index 965e407..9a2eb7f 100644
--- a/dexoptanalyzer/dexoptanalyzer.cc
+++ b/dexoptanalyzer/dexoptanalyzer.cc
@@ -216,6 +216,8 @@
     if (!CreateRuntime()) {
       return kErrorCannotCreateRuntime;
     }
+    std::unique_ptr<Runtime> runtime(Runtime::Current());
+
     OatFileAssistant oat_file_assistant(dex_file_.c_str(), isa_, /*load_executable*/ false);
     // Always treat elements of the bootclasspath as up-to-date.
     // TODO(calin): this check should be in OatFileAssistant.
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index eb57d33..3c60bf4 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -433,6 +433,11 @@
   { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x12, "div_u", "Vkmn" },
   { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x12, "mod_s", "Vkmn" },
   { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x12, "mod_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x10, "add_a", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x4 << 23) | 0x10, "ave_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x10, "ave_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x10, "aver_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x10, "aver_u", "Vkmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x0 << 22) | 0x1b, "fadd", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x1b, "fsub", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x2 << 22) | 0x1b, "fmul", "Ukmn" },
diff --git a/oatdump/Android.bp b/oatdump/Android.bp
index f1fcf3d..1cd97c2 100644
--- a/oatdump/Android.bp
+++ b/oatdump/Android.bp
@@ -114,5 +114,8 @@
     defaults: [
         "art_gtest_defaults",
     ],
-    srcs: ["oatdump_test.cc"],
+    srcs: [
+        "oatdump_test.cc",
+        "oatdump_image_test.cc",
+    ],
 }
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 878d0f2..f07e0f9 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -125,9 +125,12 @@
     std::unique_ptr<const InstructionSetFeatures> features = InstructionSetFeatures::FromBitmap(
         isa, oat_file_->GetOatHeader().GetInstructionSetFeaturesBitmap());
 
-    File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
+    std::unique_ptr<File> elf_file(OS::CreateEmptyFile(output_name_.c_str()));
+    if (elf_file == nullptr) {
+      return false;
+    }
     std::unique_ptr<BufferedOutputStream> output_stream(
-        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file.get())));
     builder_.reset(new ElfBuilder<ElfTypes>(isa, features.get(), output_stream.get()));
 
     builder_->Start();
@@ -182,7 +185,17 @@
 
     builder_->End();
 
-    return builder_->Good();
+    bool ret_value = builder_->Good();
+
+    builder_.reset();
+    output_stream.reset();
+
+    if (elf_file->FlushCloseOrErase() != 0) {
+      return false;
+    }
+    elf_file.reset();
+
+    return ret_value;
   }
 
   void Walk() {
@@ -2842,14 +2855,14 @@
 
 static int SymbolizeOat(const char* oat_filename, std::string& output_name, bool no_bits) {
   std::string error_msg;
-  OatFile* oat_file = OatFile::Open(oat_filename,
-                                    oat_filename,
-                                    nullptr,
-                                    nullptr,
-                                    false,
-                                    /*low_4gb*/false,
-                                    nullptr,
-                                    &error_msg);
+  std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_filename,
+                                                  oat_filename,
+                                                  nullptr,
+                                                  nullptr,
+                                                  false,
+                                                  /*low_4gb*/false,
+                                                  nullptr,
+                                                  &error_msg));
   if (oat_file == nullptr) {
     fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str());
     return EXIT_FAILURE;
@@ -2859,10 +2872,10 @@
   // Try to produce an ELF file of the same type. This is finicky, as we have used 32-bit ELF
   // files for 64-bit code in the past.
   if (Is64BitInstructionSet(oat_file->GetOatHeader().GetInstructionSet())) {
-    OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file, output_name, no_bits);
+    OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file.get(), output_name, no_bits);
     result = oat_symbolizer.Symbolize();
   } else {
-    OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file, output_name, no_bits);
+    OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file.get(), output_name, no_bits);
     result = oat_symbolizer.Symbolize();
   }
   if (!result) {
diff --git a/oatdump/oatdump_image_test.cc b/oatdump/oatdump_image_test.cc
new file mode 100644
index 0000000..e9cc922
--- /dev/null
+++ b/oatdump/oatdump_image_test.cc
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "oatdump_test.h"
+
+namespace art {
+
+// Disable tests on arm and mips as they are taking too long to run. b/27824283.
+#if !defined(__arm__) && !defined(__mips__)
+TEST_F(OatDumpTest, TestImage) {
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kDynamic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
+}
+TEST_F(OatDumpTest, TestImageStatic) {
+  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kStatic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
+}
+
+TEST_F(OatDumpTest, TestOatImage) {
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kDynamic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
+}
+TEST_F(OatDumpTest, TestOatImageStatic) {
+  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
+  std::string error_msg;
+  ASSERT_TRUE(Exec(kStatic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
+}
+#endif
+}  // namespace art
diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc
index c7c3ddd..7260d74 100644
--- a/oatdump/oatdump_test.cc
+++ b/oatdump/oatdump_test.cc
@@ -14,235 +14,12 @@
  * limitations under the License.
  */
 
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "android-base/strings.h"
-
-#include "common_runtime_test.h"
-
-#include "base/unix_file/fd_file.h"
-#include "runtime/arch/instruction_set.h"
-#include "runtime/exec_utils.h"
-#include "runtime/gc/heap.h"
-#include "runtime/gc/space/image_space.h"
-#include "runtime/os.h"
-#include "runtime/utils.h"
-#include "utils.h"
-
-#include <sys/types.h>
-#include <unistd.h>
+#include "oatdump_test.h"
 
 namespace art {
 
-class OatDumpTest : public CommonRuntimeTest {
- protected:
-  virtual void SetUp() {
-    CommonRuntimeTest::SetUp();
-    core_art_location_ = GetCoreArtLocation();
-    core_oat_location_ = GetSystemImageFilename(GetCoreOatLocation().c_str(), kRuntimeISA);
-  }
-
-  // Linking flavor.
-  enum Flavor {
-    kDynamic,  // oatdump(d)
-    kStatic,   // oatdump(d)s
-  };
-
-  // Returns path to the oatdump binary.
-  std::string GetOatDumpFilePath(Flavor flavor) {
-    std::string root = GetTestAndroidRoot();
-    root += "/bin/oatdump";
-    if (kIsDebugBuild) {
-      root += "d";
-    }
-    if (flavor == kStatic) {
-      root += "s";
-    }
-    return root;
-  }
-
-  enum Mode {
-    kModeOat,
-    kModeArt,
-    kModeSymbolize,
-  };
-
-  // Display style.
-  enum Display {
-    kListOnly,
-    kListAndCode
-  };
-
-  // Run the test with custom arguments.
-  bool Exec(Flavor flavor,
-            Mode mode,
-            const std::vector<std::string>& args,
-            Display display,
-            std::string* error_msg) {
-    std::string file_path = GetOatDumpFilePath(flavor);
-
-    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
-
-    // ScratchFile scratch;
-    std::vector<std::string> exec_argv = { file_path };
-    std::vector<std::string> expected_prefixes;
-    if (mode == kModeSymbolize) {
-      exec_argv.push_back("--symbolize=" + core_oat_location_);
-      exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
-    } else {
-      expected_prefixes.push_back("Dex file data for");
-      expected_prefixes.push_back("Num string ids:");
-      expected_prefixes.push_back("Num field ids:");
-      expected_prefixes.push_back("Num method ids:");
-      expected_prefixes.push_back("LOCATION:");
-      expected_prefixes.push_back("MAGIC:");
-      expected_prefixes.push_back("DEX FILE COUNT:");
-      if (display == kListAndCode) {
-        // Code and dex code do not show up if list only.
-        expected_prefixes.push_back("DEX CODE:");
-        expected_prefixes.push_back("CODE:");
-        expected_prefixes.push_back("CodeInfoEncoding");
-        expected_prefixes.push_back("CodeInfoInlineInfo");
-      }
-      if (mode == kModeArt) {
-        exec_argv.push_back("--image=" + core_art_location_);
-        exec_argv.push_back("--instruction-set=" + std::string(
-            GetInstructionSetString(kRuntimeISA)));
-        expected_prefixes.push_back("IMAGE LOCATION:");
-        expected_prefixes.push_back("IMAGE BEGIN:");
-        expected_prefixes.push_back("kDexCaches:");
-      } else {
-        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
-        exec_argv.push_back("--oat-file=" + core_oat_location_);
-      }
-    }
-    exec_argv.insert(exec_argv.end(), args.begin(), args.end());
-
-    bool result = true;
-    // We must set --android-root.
-    int link[2];
-    if (pipe(link) == -1) {
-      *error_msg = strerror(errno);
-      return false;
-    }
-
-    const pid_t pid = fork();
-    if (pid == -1) {
-      *error_msg = strerror(errno);
-      return false;
-    }
-
-    if (pid == 0) {
-      dup2(link[1], STDOUT_FILENO);
-      close(link[0]);
-      close(link[1]);
-      // change process groups, so we don't get reaped by ProcessManager
-      setpgid(0, 0);
-      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
-      std::vector<char*> argv;
-      for (size_t i = 0; i < exec_argv.size(); ++i) {
-        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
-      }
-      argv.push_back(nullptr);
-      UNUSED(execv(argv[0], &argv[0]));
-      const std::string command_line(android::base::Join(exec_argv, ' '));
-      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
-      // _exit to avoid atexit handlers in child.
-      _exit(1);
-    } else {
-      close(link[1]);
-      static const size_t kLineMax = 256;
-      char line[kLineMax] = {};
-      size_t line_len = 0;
-      size_t total = 0;
-      std::vector<bool> found(expected_prefixes.size(), false);
-      while (true) {
-        while (true) {
-          size_t spaces = 0;
-          // Trim spaces at the start of the line.
-          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
-          if (spaces > 0) {
-            line_len -= spaces;
-            memmove(&line[0], &line[spaces], line_len);
-          }
-          ssize_t bytes_read =
-              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
-          if (bytes_read <= 0) {
-            break;
-          }
-          line_len += bytes_read;
-          total += bytes_read;
-        }
-        if (line_len == 0) {
-          break;
-        }
-        // Check contents.
-        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
-          const std::string& expected = expected_prefixes[i];
-          if (!found[i] &&
-              line_len >= expected.length() &&
-              memcmp(line, expected.c_str(), expected.length()) == 0) {
-            found[i] = true;
-          }
-        }
-        // Skip to next line.
-        size_t next_line = 0;
-        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
-        line_len -= next_line + 1;
-        memmove(&line[0], &line[next_line + 1], line_len);
-      }
-      if (mode == kModeSymbolize) {
-        EXPECT_EQ(total, 0u);
-      } else {
-        EXPECT_GT(total, 0u);
-      }
-      LOG(INFO) << "Processed bytes " << total;
-      close(link[0]);
-      int status = 0;
-      if (waitpid(pid, &status, 0) != -1) {
-        result = (status == 0);
-      }
-
-      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
-        if (!found[i]) {
-          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
-          result = false;
-        }
-      }
-    }
-
-    return result;
-  }
-
- private:
-  std::string core_art_location_;
-  std::string core_oat_location_;
-};
-
 // Disable tests on arm and mips as they are taking too long to run. b/27824283.
 #if !defined(__arm__) && !defined(__mips__)
-TEST_F(OatDumpTest, TestImage) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kDynamic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
-}
-TEST_F(OatDumpTest, TestImageStatic) {
-  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kStatic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg;
-}
-
-TEST_F(OatDumpTest, TestOatImage) {
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kDynamic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
-}
-TEST_F(OatDumpTest, TestOatImageStatic) {
-  TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS();
-  std::string error_msg;
-  ASSERT_TRUE(Exec(kStatic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg;
-}
-
 TEST_F(OatDumpTest, TestNoDumpVmap) {
   std::string error_msg;
   ASSERT_TRUE(Exec(kDynamic, kModeArt, {"--no-dump:vmap"}, kListAndCode, &error_msg)) << error_msg;
diff --git a/oatdump/oatdump_test.h b/oatdump/oatdump_test.h
new file mode 100644
index 0000000..48e9eb5
--- /dev/null
+++ b/oatdump/oatdump_test.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_OATDUMP_OATDUMP_TEST_H_
+#define ART_OATDUMP_OATDUMP_TEST_H_
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "android-base/strings.h"
+
+#include "common_runtime_test.h"
+
+#include "base/unix_file/fd_file.h"
+#include "runtime/arch/instruction_set.h"
+#include "runtime/exec_utils.h"
+#include "runtime/gc/heap.h"
+#include "runtime/gc/space/image_space.h"
+#include "runtime/os.h"
+#include "runtime/utils.h"
+#include "utils.h"
+
+#include <sys/types.h>
+#include <unistd.h>
+
+namespace art {
+
+class OatDumpTest : public CommonRuntimeTest {
+ protected:
+  virtual void SetUp() {
+    CommonRuntimeTest::SetUp();
+    core_art_location_ = GetCoreArtLocation();
+    core_oat_location_ = GetSystemImageFilename(GetCoreOatLocation().c_str(), kRuntimeISA);
+  }
+
+  // Linking flavor.
+  enum Flavor {
+    kDynamic,  // oatdump(d)
+    kStatic,   // oatdump(d)s
+  };
+
+  // Returns path to the oatdump binary.
+  std::string GetOatDumpFilePath(Flavor flavor) {
+    std::string root = GetTestAndroidRoot();
+    root += "/bin/oatdump";
+    if (kIsDebugBuild) {
+      root += "d";
+    }
+    if (flavor == kStatic) {
+      root += "s";
+    }
+    return root;
+  }
+
+  enum Mode {
+    kModeOat,
+    kModeArt,
+    kModeSymbolize,
+  };
+
+  // Display style.
+  enum Display {
+    kListOnly,
+    kListAndCode
+  };
+
+  // Run the test with custom arguments.
+  bool Exec(Flavor flavor,
+            Mode mode,
+            const std::vector<std::string>& args,
+            Display display,
+            std::string* error_msg) {
+    std::string file_path = GetOatDumpFilePath(flavor);
+
+    EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path";
+
+    // ScratchFile scratch;
+    std::vector<std::string> exec_argv = { file_path };
+    std::vector<std::string> expected_prefixes;
+    if (mode == kModeSymbolize) {
+      exec_argv.push_back("--symbolize=" + core_oat_location_);
+      exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize");
+    } else {
+      expected_prefixes.push_back("Dex file data for");
+      expected_prefixes.push_back("Num string ids:");
+      expected_prefixes.push_back("Num field ids:");
+      expected_prefixes.push_back("Num method ids:");
+      expected_prefixes.push_back("LOCATION:");
+      expected_prefixes.push_back("MAGIC:");
+      expected_prefixes.push_back("DEX FILE COUNT:");
+      if (display == kListAndCode) {
+        // Code and dex code do not show up if list only.
+        expected_prefixes.push_back("DEX CODE:");
+        expected_prefixes.push_back("CODE:");
+        expected_prefixes.push_back("CodeInfoEncoding");
+        expected_prefixes.push_back("CodeInfoInlineInfo");
+      }
+      if (mode == kModeArt) {
+        exec_argv.push_back("--image=" + core_art_location_);
+        exec_argv.push_back("--instruction-set=" + std::string(
+            GetInstructionSetString(kRuntimeISA)));
+        expected_prefixes.push_back("IMAGE LOCATION:");
+        expected_prefixes.push_back("IMAGE BEGIN:");
+        expected_prefixes.push_back("kDexCaches:");
+      } else {
+        CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat));
+        exec_argv.push_back("--oat-file=" + core_oat_location_);
+      }
+    }
+    exec_argv.insert(exec_argv.end(), args.begin(), args.end());
+
+    bool result = true;
+    // We must set --android-root.
+    int link[2];
+    if (pipe(link) == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    const pid_t pid = fork();
+    if (pid == -1) {
+      *error_msg = strerror(errno);
+      return false;
+    }
+
+    if (pid == 0) {
+      dup2(link[1], STDOUT_FILENO);
+      close(link[0]);
+      close(link[1]);
+      // change process groups, so we don't get reaped by ProcessManager
+      setpgid(0, 0);
+      // Use execv here rather than art::Exec to avoid blocking on waitpid here.
+      std::vector<char*> argv;
+      for (size_t i = 0; i < exec_argv.size(); ++i) {
+        argv.push_back(const_cast<char*>(exec_argv[i].c_str()));
+      }
+      argv.push_back(nullptr);
+      UNUSED(execv(argv[0], &argv[0]));
+      const std::string command_line(android::base::Join(exec_argv, ' '));
+      PLOG(ERROR) << "Failed to execv(" << command_line << ")";
+      // _exit to avoid atexit handlers in child.
+      _exit(1);
+    } else {
+      close(link[1]);
+      static const size_t kLineMax = 256;
+      char line[kLineMax] = {};
+      size_t line_len = 0;
+      size_t total = 0;
+      std::vector<bool> found(expected_prefixes.size(), false);
+      while (true) {
+        while (true) {
+          size_t spaces = 0;
+          // Trim spaces at the start of the line.
+          for (; spaces < line_len && isspace(line[spaces]); ++spaces) {}
+          if (spaces > 0) {
+            line_len -= spaces;
+            memmove(&line[0], &line[spaces], line_len);
+          }
+          ssize_t bytes_read =
+              TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len));
+          if (bytes_read <= 0) {
+            break;
+          }
+          line_len += bytes_read;
+          total += bytes_read;
+        }
+        if (line_len == 0) {
+          break;
+        }
+        // Check contents.
+        for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+          const std::string& expected = expected_prefixes[i];
+          if (!found[i] &&
+              line_len >= expected.length() &&
+              memcmp(line, expected.c_str(), expected.length()) == 0) {
+            found[i] = true;
+          }
+        }
+        // Skip to next line.
+        size_t next_line = 0;
+        for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {}
+        line_len -= next_line + 1;
+        memmove(&line[0], &line[next_line + 1], line_len);
+      }
+      if (mode == kModeSymbolize) {
+        EXPECT_EQ(total, 0u);
+      } else {
+        EXPECT_GT(total, 0u);
+      }
+      LOG(INFO) << "Processed bytes " << total;
+      close(link[0]);
+      int status = 0;
+      if (waitpid(pid, &status, 0) != -1) {
+        result = (status == 0);
+      }
+
+      for (size_t i = 0; i < expected_prefixes.size(); ++i) {
+        if (!found[i]) {
+          LOG(ERROR) << "Did not find prefix " << expected_prefixes[i];
+          result = false;
+        }
+      }
+    }
+
+    return result;
+  }
+
+ private:
+  std::string core_art_location_;
+  std::string core_oat_location_;
+};
+
+}  // namespace art
+
+#endif  // ART_OATDUMP_OATDUMP_TEST_H_
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index fbb0978..e750ede 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -30,6 +30,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/dumpable.h"
+#include "base/memory_tool.h"
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
 #include "base/unix_file/fd_file.h"
@@ -142,6 +143,8 @@
     LOG(ERROR) << "Unable to initialize runtime";
     return false;
   }
+  std::unique_ptr<Runtime> runtime(Runtime::Current());
+
   // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
   // give it away now and then switch to a more manageable ScopedObjectAccess.
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
@@ -286,6 +289,13 @@
       return false;
     }
   }
+
+  if (!kIsDebugBuild && !(RUNNING_ON_MEMORY_TOOL && kMemoryToolDetectsLeaks)) {
+    // We want to just exit on non-debug builds, not bringing the runtime down
+    // in an orderly fashion. So release the following fields.
+    runtime.release();
+  }
+
   return true;
 }
 
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 029de46..a277edf 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -872,7 +872,7 @@
     POISON_HEAP_REF r2
     str r2, [r3, r1, lsl #2]
     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
-    lsr r0, r0, #7
+    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
     strb r3, [r3, r0]
     blx lr
 .Ldo_aput_null:
@@ -900,7 +900,7 @@
     POISON_HEAP_REF r2
     str r2, [r3, r1, lsl #2]
     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
-    lsr r0, r0, #7
+    lsr r0, r0, #CARD_TABLE_CARD_SHIFT
     strb r3, [r3, r0]
     blx lr
 .Lthrow_array_store_exception:
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index d043962..c555126 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1416,7 +1416,7 @@
     POISON_HEAP_REF w2
     str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
-    lsr x0, x0, #7
+    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
     strb w3, [x3, x0]
     ret
 .Ldo_aput_null:
@@ -1447,7 +1447,7 @@
     POISON_HEAP_REF w2
     str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
-    lsr x0, x0, #7
+    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
     strb w3, [x3, x0]
     ret
     .cfi_restore_state            // Reset unwind info so following code unwinds.
diff --git a/runtime/arch/context-inl.h b/runtime/arch/context-inl.h
new file mode 100644
index 0000000..ddcbbb1
--- /dev/null
+++ b/runtime/arch/context-inl.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is special-purpose for cases where you want a stack context. Most users should use
+// Context::Create().
+
+#include "context.h"
+
+#ifndef ART_RUNTIME_ARCH_CONTEXT_INL_H_
+#define ART_RUNTIME_ARCH_CONTEXT_INL_H_
+
+#if defined(__arm__)
+#include "arm/context_arm.h"
+#define RUNTIME_CONTEXT_TYPE arm::ArmContext
+#elif defined(__aarch64__)
+#include "arm64/context_arm64.h"
+#define RUNTIME_CONTEXT_TYPE arm64::Arm64Context
+#elif defined(__mips__) && !defined(__LP64__)
+#include "mips/context_mips.h"
+#define RUNTIME_CONTEXT_TYPE mips::MipsContext
+#elif defined(__mips__) && defined(__LP64__)
+#include "mips64/context_mips64.h"
+#define RUNTIME_CONTEXT_TYPE mips64::Mips64Context
+#elif defined(__i386__)
+#include "x86/context_x86.h"
+#define RUNTIME_CONTEXT_TYPE x86::X86Context
+#elif defined(__x86_64__)
+#include "x86_64/context_x86_64.h"
+#define RUNTIME_CONTEXT_TYPE x86_64::X86_64Context
+#else
+#error unimplemented
+#endif
+
+namespace art {
+
+using RuntimeContextType = RUNTIME_CONTEXT_TYPE;
+
+}  // namespace art
+
+#undef RUNTIME_CONTEXT_TYPE
+
+#endif  // ART_RUNTIME_ARCH_CONTEXT_INL_H_
diff --git a/runtime/arch/context.cc b/runtime/arch/context.cc
index bf40a3f..82d8b6c 100644
--- a/runtime/arch/context.cc
+++ b/runtime/arch/context.cc
@@ -14,43 +14,12 @@
  * limitations under the License.
  */
 
-#include "context.h"
-
-#if defined(__arm__)
-#include "arm/context_arm.h"
-#elif defined(__aarch64__)
-#include "arm64/context_arm64.h"
-#elif defined(__mips__) && !defined(__LP64__)
-#include "mips/context_mips.h"
-#elif defined(__mips__) && defined(__LP64__)
-#include "mips64/context_mips64.h"
-#elif defined(__i386__)
-#include "x86/context_x86.h"
-#elif defined(__x86_64__)
-#include "x86_64/context_x86_64.h"
-#else
-#include "base/logging.h"
-#endif
+#include "context-inl.h"
 
 namespace art {
 
 Context* Context::Create() {
-#if defined(__arm__)
-  return new arm::ArmContext();
-#elif defined(__aarch64__)
-  return new arm64::Arm64Context();
-#elif defined(__mips__) && !defined(__LP64__)
-  return new mips::MipsContext();
-#elif defined(__mips__) && defined(__LP64__)
-  return new mips64::Mips64Context();
-#elif defined(__i386__)
-  return new x86::X86Context();
-#elif defined(__x86_64__)
-  return new x86_64::X86_64Context();
-#else
-  UNIMPLEMENTED(FATAL);
-  return nullptr;
-#endif
+  return new RuntimeContextType;
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 722a679..61a3a04 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1406,7 +1406,7 @@
     POISON_HEAP_REF $a2
     sw  $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     lw  $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
-    srl $t1, $a0, 7
+    srl $t1, $a0, CARD_TABLE_CARD_SHIFT
     add $t1, $t1, $t0
     sb  $t0, ($t1)
     jalr $zero, $ra
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 9402232..24caa0e 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1374,7 +1374,7 @@
     POISON_HEAP_REF $a2
     sw   $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0)
     ld   $t0, THREAD_CARD_TABLE_OFFSET(rSELF)
-    dsrl  $t1, $a0, 7
+    dsrl  $t1, $a0, CARD_TABLE_CARD_SHIFT
     daddu $t1, $t1, $t0
     sb   $t0, ($t1)
     jalr $zero, $ra
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 6c0bcc9..3694c3e 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1526,7 +1526,7 @@
     POISON_HEAP_REF edx
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)
     movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
-    shrl LITERAL(7), %eax
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax
     movb %dl, (%edx, %eax)
     ret
 .Ldo_aput_null:
@@ -1567,7 +1567,7 @@
     POISON_HEAP_REF edx
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4)  // do the aput
     movl %fs:THREAD_CARD_TABLE_OFFSET, %edx
-    shrl LITERAL(7), %eax
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax
     movb %dl, (%edx, %eax)
     ret
     CFI_ADJUST_CFA_OFFSET(12)     // 3 POP after the jz for unwinding.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 8e2acab..ad7c2b3 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1504,8 +1504,8 @@
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
-    shrl LITERAL(7), %edi
-//  shrl LITERAL(7), %rdi
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
+//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
     ret
 .Ldo_aput_null:
@@ -1545,8 +1545,8 @@
     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
-    shrl LITERAL(7), %edi
-//  shrl LITERAL(7), %rdi
+    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
+//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
 //  movb %dl, (%rdx, %rdi)
     ret
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 935fd81..136ed12 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -33,6 +33,7 @@
 
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
+  // Every name should have the same width and end with a space. Abbreviate if necessary:
   "Misc         ",
   "SwitchTbl    ",
   "SlowPaths    ",
@@ -49,6 +50,7 @@
   "Successors   ",
   "Dominated    ",
   "Instruction  ",
+  "CtorFenceIns ",
   "InvokeInputs ",
   "PhiInputs    ",
   "LoopInfo     ",
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index c39429c..60b6ea8 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -59,6 +59,7 @@
   kArenaAllocSuccessors,
   kArenaAllocDominated,
   kArenaAllocInstruction,
+  kArenaAllocConstructorFenceInputs,
   kArenaAllocInvokeInputs,
   kArenaAllocPhiInputs,
   kArenaAllocLoopInfo,
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index 6b67864..c5b0af6 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -98,7 +98,9 @@
       // Check that the value is within the upper limit of Dest.
       (static_cast<uintmax_t>(std::numeric_limits<Dest>::max()) >=
           static_cast<uintmax_t>(std::numeric_limits<Source>::max()) ||
-          source <= static_cast<Source>(std::numeric_limits<Dest>::max())));
+          source <= static_cast<Source>(std::numeric_limits<Dest>::max())))
+      << "dchecked_integral_cast failed for " << source
+      << " (would be " << static_cast<Dest>(source) << ")";
 
   return static_cast<Dest>(source);
 }
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index bfa273d..56e8aa3 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -243,6 +243,12 @@
     return; \
   }
 
+#define TEST_DISABLED_FOR_MEMORY_TOOL() \
+  if (RUNNING_ON_MEMORY_TOOL > 0) { \
+    printf("WARNING: TEST DISABLED FOR MEMORY TOOL\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 7d56bca..1397916 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -1135,7 +1135,7 @@
 bool GetParametersMetadataForMethod(ArtMethod* method,
                                     MutableHandle<mirror::ObjectArray<mirror::String>>* names,
                                     MutableHandle<mirror::IntArray>* access_flags) {
-  const DexFile::AnnotationSetItem::AnnotationSetItem* annotation_set =
+  const DexFile::AnnotationSetItem* annotation_set =
       FindAnnotationSetForMethod(method);
   if (annotation_set == nullptr) {
     return false;
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 68ef15d..cd30d9d 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -47,7 +47,7 @@
 // WriteBarrier, and from there to here.
 class CardTable {
  public:
-  static constexpr size_t kCardShift = 7;
+  static constexpr size_t kCardShift = 10;
   static constexpr size_t kCardSize = 1 << kCardShift;
   static constexpr uint8_t kCardClean = 0x0;
   static constexpr uint8_t kCardDirty = 0x70;
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 34e30c1..c416b9c 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -391,7 +391,7 @@
     uintptr_t end = start + CardTable::kCardSize;
     live_bitmap->VisitMarkedRange(start,
                                   end,
-                                  [this, callback, arg](mirror::Object* obj) {
+                                  [callback, arg](mirror::Object* obj) {
       callback(obj, arg);
     });
   }
@@ -402,7 +402,7 @@
     uintptr_t end = start + CardTable::kCardSize;
     live_bitmap->VisitMarkedRange(start,
                                   end,
-                                  [this, callback, arg](mirror::Object* obj) {
+                                  [callback, arg](mirror::Object* obj) {
       callback(obj, arg);
     });
   }
@@ -560,7 +560,7 @@
             << start << " " << *space_;
         space_->GetLiveBitmap()->VisitMarkedRange(start,
                                                   start + CardTable::kCardSize,
-                                                  [this, callback, arg](mirror::Object* obj) {
+                                                  [callback, arg](mirror::Object* obj) {
           callback(obj, arg);
         });
       });
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 015e6ba..e27c1ec 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -378,8 +378,15 @@
         cc->VerifyGrayImmuneObjects();
       }
     }
-    cc->java_lang_Object_ = down_cast<mirror::Class*>(cc->Mark(
-        WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object).Ptr()));
+    // May be null during runtime creation, in this case leave java_lang_Object null.
+    // This is safe since single threaded behavior should mean FillDummyObject does not
+    // happen when java_lang_Object_ is null.
+    if (WellKnownClasses::java_lang_Object != nullptr) {
+      cc->java_lang_Object_ = down_cast<mirror::Class*>(cc->Mark(
+          WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object).Ptr()));
+    } else {
+      cc->java_lang_Object_ = nullptr;
+    }
   }
 
  private:
@@ -2073,6 +2080,7 @@
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
     // An int array is too big. Use java.lang.Object.
+    CHECK(java_lang_Object_ != nullptr);
     AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object_);
     CHECK_EQ(byte_size, (java_lang_Object_->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
     dummy_obj->SetClass(java_lang_Object_);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index cab293f..9d3d950 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -140,7 +140,7 @@
       }
     } else {
       DCHECK(!space_->HasAddress(obj));
-      auto slow_path = [this](const mirror::Object* ref)
+      auto slow_path = [](const mirror::Object* ref)
           REQUIRES_SHARED(Locks::mutator_lock_) {
         // Marking a large object, make sure its aligned as a sanity check.
         if (!IsAligned<kPageSize>(ref)) {
diff --git a/runtime/gc/heap_verification_test.cc b/runtime/gc/heap_verification_test.cc
index c8233e3..2cdfc16 100644
--- a/runtime/gc/heap_verification_test.cc
+++ b/runtime/gc/heap_verification_test.cc
@@ -16,6 +16,7 @@
 
 #include "common_runtime_test.h"
 
+#include "base/memory_tool.h"
 #include "class_linker.h"
 #include "handle_scope-inl.h"
 #include "mirror/object-inl.h"
@@ -63,7 +64,7 @@
       reinterpret_cast<const void*>(&uint_klass)));
 }
 
-TEST_F(VerificationTest, IsValidClass) {
+TEST_F(VerificationTest, IsValidClassOrNotInHeap) {
   ScopedObjectAccess soa(Thread::Current());
   VariableSizedHandleScope hs(soa.Self());
   Handle<mirror::String> string(
@@ -72,14 +73,35 @@
   EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(1)));
   EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(4)));
   EXPECT_FALSE(v->IsValidClass(nullptr));
-  EXPECT_FALSE(v->IsValidClass(string.Get()));
   EXPECT_TRUE(v->IsValidClass(string->GetClass()));
+  EXPECT_FALSE(v->IsValidClass(string.Get()));
+}
+
+TEST_F(VerificationTest, IsValidClassInHeap) {
+  TEST_DISABLED_FOR_MEMORY_TOOL();
+  ScopedObjectAccess soa(Thread::Current());
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::String> string(
+      hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "test")));
+  const Verification* const v = Runtime::Current()->GetHeap()->GetVerification();
   const uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass());
   EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(uint_klass - kObjectAlignment)));
   EXPECT_FALSE(v->IsValidClass(reinterpret_cast<const void*>(&uint_klass)));
 }
 
-TEST_F(VerificationTest, DumpObjectInfo) {
+TEST_F(VerificationTest, DumpInvalidObjectInfo) {
+  ScopedLogSeverity sls(LogSeverity::INFO);
+  ScopedObjectAccess soa(Thread::Current());
+  Runtime* const runtime = Runtime::Current();
+  VariableSizedHandleScope hs(soa.Self());
+  const Verification* const v = runtime->GetHeap()->GetVerification();
+  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(1), "obj");
+  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(4), "obj");
+  LOG(INFO) << v->DumpObjectInfo(nullptr, "obj");
+}
+
+TEST_F(VerificationTest, DumpValidObjectInfo) {
+  TEST_DISABLED_FOR_MEMORY_TOOL();
   ScopedLogSeverity sls(LogSeverity::INFO);
   ScopedObjectAccess soa(Thread::Current());
   Runtime* const runtime = Runtime::Current();
@@ -89,9 +111,6 @@
   Handle<mirror::ObjectArray<mirror::Object>> arr(
       hs.NewHandle(AllocObjectArray<mirror::Object>(soa.Self(), 256)));
   const Verification* const v = runtime->GetHeap()->GetVerification();
-  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(1), "obj");
-  LOG(INFO) << v->DumpObjectInfo(reinterpret_cast<const void*>(4), "obj");
-  LOG(INFO) << v->DumpObjectInfo(nullptr, "obj");
   LOG(INFO) << v->DumpObjectInfo(string.Get(), "test");
   LOG(INFO) << v->DumpObjectInfo(string->GetClass(), "obj");
   const uintptr_t uint_klass = reinterpret_cast<uintptr_t>(string->GetClass());
@@ -102,6 +121,7 @@
 }
 
 TEST_F(VerificationTest, LogHeapCorruption) {
+  TEST_DISABLED_FOR_MEMORY_TOOL();
   ScopedLogSeverity sls(LogSeverity::INFO);
   ScopedObjectAccess soa(Thread::Current());
   Runtime* const runtime = Runtime::Current();
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index e9f0758..748d378 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -651,7 +651,8 @@
               bitmap_name,
               image_bitmap_map.release(),
               reinterpret_cast<uint8_t*>(map->Begin()),
-              image_objects.End()));
+              // Make sure the bitmap is aligned to card size instead of just bitmap word size.
+              RoundUp(image_objects.End(), gc::accounting::CardTable::kCardSize)));
       if (bitmap == nullptr) {
         *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str());
         return nullptr;
@@ -1695,6 +1696,29 @@
   return true;
 }
 
+ImageSpace::~ImageSpace() {
+  Runtime* runtime = Runtime::Current();
+  if (runtime == nullptr) {
+    return;
+  }
+
+  if (GetImageHeader().IsAppImage()) {
+    // This image space did not modify resolution method then in Init.
+    return;
+  }
+
+  if (!runtime->HasResolutionMethod()) {
+    // Another image space has already unloaded the below methods.
+    return;
+  }
+
+  runtime->ClearInstructionSet();
+  runtime->ClearResolutionMethod();
+  runtime->ClearImtConflictMethod();
+  runtime->ClearImtUnimplementedMethod();
+  runtime->ClearCalleeSaveMethods();
+}
+
 std::unique_ptr<ImageSpace> ImageSpace::CreateFromAppImage(const char* image,
                                                            const OatFile* oat_file,
                                                            std::string* error_msg) {
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 199bbdd..aa3dd42 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -159,6 +159,9 @@
 
   void DumpSections(std::ostream& os) const;
 
+  // De-initialize the image-space by undoing the effects in Init().
+  virtual ~ImageSpace();
+
  protected:
   // Tries to initialize an ImageSpace from the given image path, returning null on error.
   //
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index af57397..06638e7 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -78,6 +78,8 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize))))
 #define STRING_DEX_CACHE_ELEMENT_SIZE 8
 DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE), (static_cast<int32_t>(sizeof(art::mirror::StringDexCachePair))))
+#define CARD_TABLE_CARD_SHIFT 0xa
+DEFINE_CHECK_EQ(static_cast<size_t>(CARD_TABLE_CARD_SHIFT), (static_cast<size_t>(art::gc::accounting::CardTable::kCardShift)))
 #define MIN_LARGE_OBJECT_THRESHOLD 0x3000
 DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold)))
 #define LOCK_WORD_STATE_SHIFT 30
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 4f390fd..8bdf6b1 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -34,7 +34,6 @@
 #include <time.h>
 #include <time.h>
 #include <unistd.h>
-
 #include <set>
 
 #include "android-base/stringprintf.h"
@@ -502,9 +501,16 @@
   void DumpHeapArray(mirror::Array* obj, mirror::Class* klass)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass)
+  void DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void DumpHeapInstanceObject(mirror::Object* obj,
+                              mirror::Class* klass,
+                              const std::set<mirror::Object*>& fake_roots)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool AddRuntimeInternalObjectsField(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
   void ProcessHeap(bool header_first)
       REQUIRES(Locks::mutator_lock_) {
     // Reset current heap and object count.
@@ -1062,37 +1068,17 @@
   ++objects_in_segment_;
 }
 
-// Use for visiting the GcRoots held live by ArtFields, ArtMethods, and ClassLoaders.
-class GcRootVisitor {
- public:
-  explicit GcRootVisitor(Hprof* hprof) : hprof_(hprof) {}
-
-  void operator()(mirror::Object* obj ATTRIBUTE_UNUSED,
-                  MemberOffset offset ATTRIBUTE_UNUSED,
-                  bool is_static ATTRIBUTE_UNUSED) const {}
-
-  // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
-  // running during the hprof dumping process.
-  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!root->IsNull()) {
-      VisitRoot(root);
-    }
+bool Hprof::AddRuntimeInternalObjectsField(mirror::Class* klass) {
+  if (klass->IsDexCacheClass()) {
+    return true;
   }
-
-  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Object* obj = root->AsMirrorPtr();
-    // The two cases are either classes or dex cache arrays. If it is a dex cache array, then use
-    // VM internal. Otherwise the object is a declaring class of an ArtField or ArtMethod or a
-    // class from a ClassLoader.
-    hprof_->VisitRoot(obj, RootInfo(obj->IsClass() ? kRootStickyClass : kRootVMInternal));
+  // IsClassLoaderClass is true for subclasses of classloader but we only want to add the fake
+  // field to the java.lang.ClassLoader class.
+  if (klass->IsClassLoaderClass() && klass->GetSuperClass()->IsObjectClass()) {
+    return true;
   }
-
-
- private:
-  Hprof* const hprof_;
-};
+  return false;
+}
 
 void Hprof::DumpHeapObject(mirror::Object* obj) {
   // Ignore classes that are retired.
@@ -1103,8 +1089,41 @@
 
   ++total_objects_;
 
-  GcRootVisitor visitor(this);
-  obj->VisitReferences(visitor, VoidFunctor());
+  class RootCollector {
+   public:
+    explicit RootCollector() {}
+
+    void operator()(mirror::Object*, MemberOffset, bool) const {}
+
+    // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be
+    // running during the hprof dumping process.
+    void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      if (!root->IsNull()) {
+        VisitRoot(root);
+      }
+    }
+
+    void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      roots_.insert(root->AsMirrorPtr());
+    }
+
+    const std::set<mirror::Object*>& GetRoots() const {
+      return roots_;
+    }
+
+   private:
+    // These roots are actually live from the object. Avoid marking them as roots in hprof to make
+    // it easier to debug class unloading.
+    mutable std::set<mirror::Object*> roots_;
+  };
+
+  RootCollector visitor;
+  // Collect all native roots.
+  if (!obj->IsClass()) {
+    obj->VisitReferences(visitor, VoidFunctor());
+  }
 
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const gc::space::ContinuousSpace* const space = heap->FindContinuousSpaceFromObject(obj, true);
@@ -1112,15 +1131,18 @@
   if (space != nullptr) {
     if (space->IsZygoteSpace()) {
       heap_type = HPROF_HEAP_ZYGOTE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) {
       // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as
       // HPROF_HEAP_APP. b/35762934
       heap_type = HPROF_HEAP_IMAGE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     }
   } else {
     const auto* los = heap->GetLargeObjectsSpace();
     if (los->Contains(obj) && los->IsZygoteLargeObject(Thread::Current(), obj)) {
       heap_type = HPROF_HEAP_ZYGOTE;
+      VisitRoot(obj, RootInfo(kRootVMInternal));
     }
   }
   CheckHeapSegmentConstraints();
@@ -1164,7 +1186,7 @@
     } else if (c->IsArrayClass()) {
       DumpHeapArray(obj->AsArray(), c);
     } else {
-      DumpHeapInstanceObject(obj, c);
+      DumpHeapInstanceObject(obj, c, visitor.GetRoots());
     }
   }
 
@@ -1269,7 +1291,10 @@
 
   // Instance fields for this class (no superclass fields)
   int iFieldCount = klass->NumInstanceFields();
-  if (klass->IsStringClass()) {
+  // add_internal_runtime_objects is only for classes that may retain objects live through means
+  // other than fields. It is never the case for strings.
+  const bool add_internal_runtime_objects = AddRuntimeInternalObjectsField(klass);
+  if (klass->IsStringClass() || add_internal_runtime_objects) {
     __ AddU2((uint16_t)iFieldCount + 1);
   } else {
     __ AddU2((uint16_t)iFieldCount);
@@ -1284,6 +1309,21 @@
   if (klass->IsStringClass()) {
     __ AddStringId(LookupStringId("value"));
     __ AddU1(hprof_basic_object);
+  } else if (add_internal_runtime_objects) {
+    __ AddStringId(LookupStringId("runtimeInternalObjects"));
+    __ AddU1(hprof_basic_object);
+  }
+}
+
+void Hprof::DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements) {
+  __ AddU1(HPROF_OBJECT_ARRAY_DUMP);
+  __ AddObjectId(obj);
+  __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj));
+  __ AddU4(elements.size());
+  __ AddClassId(LookupClassId(
+      Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kObjectArrayClass)));
+  for (mirror::Object* e : elements) {
+    __ AddObjectId(e);
   }
 }
 
@@ -1327,7 +1367,9 @@
   }
 }
 
-void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) {
+void Hprof::DumpHeapInstanceObject(mirror::Object* obj,
+                                   mirror::Class* klass,
+                                   const std::set<mirror::Object*>& fake_roots) {
   // obj is an instance object.
   __ AddU1(HPROF_INSTANCE_DUMP);
   __ AddObjectId(obj);
@@ -1341,6 +1383,7 @@
 
   // What we will use for the string value if the object is a string.
   mirror::Object* string_value = nullptr;
+  mirror::Object* fake_object_array = nullptr;
 
   // Write the instance data;  fields for this class, followed by super class fields, and so on.
   do {
@@ -1396,8 +1439,12 @@
         }
       }
       __ AddObjectId(string_value);
+    } else if (AddRuntimeInternalObjectsField(klass)) {
+      // We need an id that is guaranteed to not be used, use 1/2 of the object alignment.
+      fake_object_array = reinterpret_cast<mirror::Object*>(
+          reinterpret_cast<uintptr_t>(obj) + kObjectAlignment / 2);
+      __ AddObjectId(fake_object_array);
     }
-
     klass = klass->GetSuperClass();
   } while (klass != nullptr);
 
@@ -1419,6 +1466,8 @@
       __ AddU1(hprof_basic_char);
       __ AddU2List(s->GetValue(), s->GetLength());
     }
+  } else if (fake_object_array != nullptr) {
+    DumpFakeObjectArray(fake_object_array, fake_roots);
   }
 }
 
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 70be30c..96934bc 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -568,7 +568,7 @@
   // Copy in content.
   memcpy(h_array->GetData(), mem_map->Begin(), map_size);
   // Be proactive releasing memory.
-  mem_map.release();
+  mem_map.reset();
 
   // Create a ByteArrayInputStream.
   Handle<mirror::Class> h_class(hs.NewHandle(
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
index c8d256f..07aeb66 100644
--- a/runtime/jit/profile_saver_options.h
+++ b/runtime/jit/profile_saver_options.h
@@ -20,7 +20,7 @@
 
 struct ProfileSaverOptions {
  public:
-  static constexpr uint32_t kMinSavePeriodMs = 20 * 1000;  // 20 seconds
+  static constexpr uint32_t kMinSavePeriodMs = 40 * 1000;  // 40 seconds
   static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000;  // 5 seconds
   // Minimum number of JIT samples during launch to include a method into the profile.
   static constexpr uint32_t kStartupMethodSamples = 1;
diff --git a/runtime/oat.h b/runtime/oat.h
index 05706252..9b2227b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '9', '\0' };  // Add thread_local_limit.
+  // Revert concurrent graying for immune spaces.
+  static constexpr uint8_t kOatVersion[] = { '1', '2', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 2c2b6fd..eafa77f 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -849,24 +849,24 @@
     return kNoDexOptNeeded;
   }
 
-  if (oat_file_assistant_->HasOriginalDexFiles()) {
-    if (filter_okay && Status() == kOatRelocationOutOfDate) {
-      return kDex2OatForRelocation;
-    }
-
-    if (IsUseable()) {
-      return kDex2OatForFilter;
-    }
-
-    if (Status() == kOatBootImageOutOfDate) {
-      return kDex2OatForBootImage;
-    }
-
-    return kDex2OatFromScratch;
+  if (filter_okay && Status() == kOatRelocationOutOfDate) {
+    return kDex2OatForRelocation;
   }
 
-  // Otherwise there is nothing we can do, even if we want to.
-  return kNoDexOptNeeded;
+  if (IsUseable()) {
+    return kDex2OatForFilter;
+  }
+
+  if (Status() == kOatBootImageOutOfDate) {
+    return kDex2OatForBootImage;
+  }
+
+  if (oat_file_assistant_->HasOriginalDexFiles()) {
+    return kDex2OatFromScratch;
+  } else {
+    // Otherwise there is nothing we can do, even if we want to.
+    return kNoDexOptNeeded;
+  }
 }
 
 const OatFile* OatFileAssistant::OatFileInfo::GetFile() {
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index 198f8e6..18924e9 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -526,7 +526,7 @@
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract));
   EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed));
-  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded,  // Can't run dex2oat because dex file is stripped.
+  EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter,  // Compiling from the .vdex file
       oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything));
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 8ffd8bb..fc91efa 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -238,6 +238,9 @@
       .Define("-Xlockprofthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::LockProfThreshold)
+      .Define("-Xstacktracedir:_")
+          .WithType<std::string>()
+          .IntoKey(M::StackTraceDir)
       .Define("-Xstacktracefile:_")
           .WithType<std::string>()
           .IntoKey(M::StackTraceFile)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7afbe72..b1acec6 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -336,6 +336,16 @@
     jit_->DeleteThreadPool();
   }
 
+  // Make sure our internal threads are dead before we start tearing down things they're using.
+  Dbg::StopJdwp();
+  delete signal_catcher_;
+
+  // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
+  {
+    ScopedTrace trace2("Delete thread list");
+    thread_list_->ShutDown();
+  }
+
   // TODO Maybe do some locking.
   for (auto& agent : agents_) {
     agent.Unload();
@@ -346,15 +356,9 @@
     plugin.Unload();
   }
 
-  // Make sure our internal threads are dead before we start tearing down things they're using.
-  Dbg::StopJdwp();
-  delete signal_catcher_;
+  // Finally delete the thread list.
+  delete thread_list_;
 
-  // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
-  {
-    ScopedTrace trace2("Delete thread list");
-    delete thread_list_;
-  }
   // Delete the JIT after thread list to ensure that there is no remaining threads which could be
   // accessing the instrumentation when we delete it.
   if (jit_ != nullptr) {
@@ -826,7 +830,7 @@
 
 void Runtime::StartSignalCatcher() {
   if (!is_zygote_) {
-    signal_catcher_ = new SignalCatcher(stack_trace_file_);
+    signal_catcher_ = new SignalCatcher(stack_trace_dir_, stack_trace_file_);
   }
 }
 
@@ -1037,6 +1041,7 @@
   abort_ = runtime_options.GetOrDefault(Opt::HookAbort);
 
   default_stack_size_ = runtime_options.GetOrDefault(Opt::StackSize);
+  stack_trace_dir_ = runtime_options.ReleaseOrDefault(Opt::StackTraceDir);
   stack_trace_file_ = runtime_options.ReleaseOrDefault(Opt::StackTraceFile);
 
   compiler_executable_ = runtime_options.ReleaseOrDefault(Opt::Compiler);
@@ -1962,12 +1967,23 @@
   }
 }
 
+void Runtime::ClearInstructionSet() {
+  instruction_set_ = InstructionSet::kNone;
+}
+
 void Runtime::SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type) {
   DCHECK_LT(static_cast<int>(type), static_cast<int>(kLastCalleeSaveType));
   CHECK(method != nullptr);
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
+void Runtime::ClearCalleeSaveMethods() {
+  for (size_t i = 0; i < static_cast<size_t>(kLastCalleeSaveType); ++i) {
+    CalleeSaveType type = static_cast<CalleeSaveType>(i);
+    callee_save_methods_[type] = reinterpret_cast<uintptr_t>(nullptr);
+  }
+}
+
 void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
                               const std::string& profile_output_filename) {
   if (jit_.get() == nullptr) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index b91cb0c..3ba0f2c 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -356,6 +356,9 @@
   }
 
   void SetResolutionMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
+  void ClearResolutionMethod() {
+    resolution_method_ = nullptr;
+  }
 
   ArtMethod* CreateResolutionMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -367,6 +370,10 @@
     return imt_conflict_method_ != nullptr;
   }
 
+  void ClearImtConflictMethod() {
+    imt_conflict_method_ = nullptr;
+  }
+
   void FixupConflictTables();
   void SetImtConflictMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
   void SetImtUnimplementedMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -374,6 +381,10 @@
   ArtMethod* CreateImtConflictMethod(LinearAlloc* linear_alloc)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void ClearImtUnimplementedMethod() {
+    imt_unimplemented_method_ = nullptr;
+  }
+
   // Returns a special method that describes all callee saves being spilled to the stack.
   enum CalleeSaveType {
     kSaveAllCalleeSaves,  // All callee-save registers.
@@ -409,8 +420,10 @@
   }
 
   void SetInstructionSet(InstructionSet instruction_set);
+  void ClearInstructionSet();
 
   void SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type);
+  void ClearCalleeSaveMethods();
 
   ArtMethod* CreateCalleeSaveMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -771,6 +784,7 @@
   ClassLinker* class_linker_;
 
   SignalCatcher* signal_catcher_;
+  std::string stack_trace_dir_;
   std::string stack_trace_file_;
 
   std::unique_ptr<JavaVMExt> java_vm_;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 16190cd..77132a8 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -100,6 +100,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
+RUNTIME_OPTIONS_KEY (std::string,         StackTraceDir)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
 RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/misc/trace/method-trace-file.bin")
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 0b7ea2f..3826433 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -27,6 +27,7 @@
 
 #include <sstream>
 
+#include "android-base/stringprintf.h"
 #include "arch/instruction_set.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
@@ -65,8 +66,10 @@
 #endif
 }
 
-SignalCatcher::SignalCatcher(const std::string& stack_trace_file)
-    : stack_trace_file_(stack_trace_file),
+SignalCatcher::SignalCatcher(const std::string& stack_trace_dir,
+                             const std::string& stack_trace_file)
+    : stack_trace_dir_(stack_trace_dir),
+      stack_trace_file_(stack_trace_file),
       lock_("SignalCatcher lock"),
       cond_("SignalCatcher::cond_", lock_),
       thread_(nullptr) {
@@ -100,19 +103,51 @@
   return halt_;
 }
 
+std::string SignalCatcher::GetStackTraceFileName() {
+  if (!stack_trace_dir_.empty()) {
+    // We'll try a maximum of ten times (arbitrarily selected) to create a file
+    // with a unique name, seeding the pseudo random generator each time.
+    //
+    // If this doesn't work, give up and log to stdout. Note that we could try
+    // indefinitely, but that would make problems in this code harder to detect
+    // since we'd be spinning in the signal catcher thread.
+    static constexpr uint32_t kMaxRetries = 10;
+
+    for (uint32_t i = 0; i < kMaxRetries; ++i) {
+        std::srand(NanoTime());
+        // Sample output for PID 1234 : /data/anr-pid1234-cafeffee.txt
+        const std::string file_name = android::base::StringPrintf(
+            "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt",
+            stack_trace_dir_.c_str(),
+            static_cast<int32_t>(getpid()),
+            static_cast<uint32_t>(std::rand()));
+
+        if (!OS::FileExists(file_name.c_str())) {
+          return file_name;
+        }
+    }
+
+    LOG(ERROR) << "Unable to obtain stack trace filename at path : " << stack_trace_dir_;
+    return "";
+  }
+
+  return stack_trace_file_;
+}
+
 void SignalCatcher::Output(const std::string& s) {
-  if (stack_trace_file_.empty()) {
+  const std::string stack_trace_file = GetStackTraceFileName();
+  if (stack_trace_file.empty()) {
     LOG(INFO) << s;
     return;
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
-  int fd = open(stack_trace_file_.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
+  int fd = open(stack_trace_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
   if (fd == -1) {
     PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
     return;
   }
-  std::unique_ptr<File> file(new File(fd, stack_trace_file_, true));
+  std::unique_ptr<File> file(new File(fd, stack_trace_file, true));
   bool success = file->WriteFully(s.data(), s.size());
   if (success) {
     success = file->FlushCloseOrErase() == 0;
@@ -120,9 +155,9 @@
     file->Erase();
   }
   if (success) {
-    LOG(INFO) << "Wrote stack traces to '" << stack_trace_file_ << "'";
+    LOG(INFO) << "Wrote stack traces to '" << stack_trace_file << "'";
   } else {
-    PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file_ << "'";
+    PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file << "'";
   }
 }
 
diff --git a/runtime/signal_catcher.h b/runtime/signal_catcher.h
index de6a212..4cd7a98 100644
--- a/runtime/signal_catcher.h
+++ b/runtime/signal_catcher.h
@@ -32,7 +32,15 @@
  */
 class SignalCatcher {
  public:
-  explicit SignalCatcher(const std::string& stack_trace_file);
+  // If |stack_trace_dir| is non empty, traces will be written to a
+  // unique file under that directory.
+  //
+  // If |stack_trace_dir| is empty, and |stack_frace_file| is non-empty,
+  // traces will be appended to |stack_trace_file|.
+  //
+  // If both are empty, all traces will be written to the log buffer.
+  explicit SignalCatcher(const std::string& stack_trace_dir,
+                         const std::string& stack_trace_file);
   ~SignalCatcher();
 
   void HandleSigQuit() REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
@@ -43,12 +51,14 @@
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* Run(void* arg) NO_THREAD_SAFETY_ANALYSIS;
 
+  std::string GetStackTraceFileName();
   void HandleSigUsr1();
   void Output(const std::string& s);
   void SetHaltFlag(bool new_value) REQUIRES(!lock_);
   bool ShouldHalt() REQUIRES(!lock_);
   int WaitForSignal(Thread* self, SignalSet& signals) REQUIRES(!lock_);
 
+  std::string stack_trace_dir_;
   std::string stack_trace_file_;
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/string_reference.h b/runtime/string_reference.h
index 0fc06e6..6ba4773 100644
--- a/runtime/string_reference.h
+++ b/runtime/string_reference.h
@@ -41,7 +41,7 @@
 
 // Compare only the reference and not the string contents.
 struct StringReferenceComparator {
-  bool operator()(const StringReference& a, const StringReference& b) {
+  bool operator()(const StringReference& a, const StringReference& b) const {
     if (a.dex_file != b.dex_file) {
       return a.dex_file < b.dex_file;
     }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 201701a..62a616b 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -35,6 +35,7 @@
 #include "android-base/stringprintf.h"
 
 #include "arch/context.h"
+#include "arch/context-inl.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/bit_utils.h"
@@ -3413,11 +3414,10 @@
     verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id));
   }
   // Visit roots on this thread's stack
-  Context* context = GetLongJumpContext();
+  RuntimeContextType context;
   RootCallbackVisitor visitor_to_callback(visitor, thread_id);
-  ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, context, visitor_to_callback);
+  ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, &context, visitor_to_callback);
   mapper.template WalkStack<StackVisitor::CountTransitions::kNo>(false);
-  ReleaseLongJumpContext(context);
   for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) {
     visitor->VisitRootIfNonNull(&frame.this_object_, RootInfo(kRootVMInternal, thread_id));
   }
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 2e0d866..d7f9ce3 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -73,12 +73,17 @@
       unregistering_count_(0),
       suspend_all_historam_("suspend all histogram", 16, 64),
       long_suspend_(false),
+      shut_down_(false),
       thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
       empty_checkpoint_barrier_(new Barrier(0)) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
 ThreadList::~ThreadList() {
+  CHECK(shut_down_);
+}
+
+void ThreadList::ShutDown() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   // Detach the current thread if necessary. If we failed to start, there might not be any threads.
   // We need to detach the current thread here in case there's another thread waiting to join with
@@ -102,6 +107,8 @@
   // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
   //       Thread::Init.
   SuspendAllDaemonThreadsForShutdown();
+
+  shut_down_ = true;
 }
 
 bool ThreadList::Contains(Thread* thread) {
@@ -1362,6 +1369,7 @@
 
 void ThreadList::Register(Thread* self) {
   DCHECK_EQ(self, Thread::Current());
+  CHECK(!shut_down_);
 
   if (VLOG_IS_ON(threads)) {
     std::ostringstream oss;
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 70917eb..14bef5e 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -50,6 +50,8 @@
   explicit ThreadList(uint64_t thread_suspend_timeout_ns);
   ~ThreadList();
 
+  void ShutDown();
+
   void DumpForSigQuit(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_);
   // For thread suspend timeout dumps.
@@ -219,6 +221,10 @@
   // Whether or not the current thread suspension is long.
   bool long_suspend_;
 
+  // Whether the shutdown function has been called. This is checked in the destructor. It is an
+  // error to destroy a ThreadList instance without first calling ShutDown().
+  bool shut_down_;
+
   // Thread suspension timeout in nanoseconds.
   const uint64_t thread_suspend_timeout_ns_;
 
diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java
index 0e69a96..71167c1 100644
--- a/test/030-bad-finalizer/src/Main.java
+++ b/test/030-bad-finalizer/src/Main.java
@@ -94,9 +94,7 @@
             /* spin for a bit */
             long start, end;
             start = System.nanoTime();
-            for (int i = 0; i < 1000000; i++) {
-                j++;
-            }
+            snooze(2000);
             end = System.nanoTime();
             System.out.println("Finalizer done spinning.");
 
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 330aa74..a538f52 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -17,8 +17,8 @@
 // TODO: Add more tests after we can inline functions with calls.
 
 class ClassWithoutFinals {
-  /// CHECK-START: void ClassWithoutFinals.<init>() register (after)
-  /// CHECK-NOT: MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithoutFinals.<init>() inliner (after)
+  /// CHECK-NOT: ConstructorFence
   public ClassWithoutFinals() {}
 }
 
@@ -33,17 +33,40 @@
     // should not inline this constructor
   }
 
-  /// CHECK-START: void ClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
+
+  /*
+   * Check that the correct assembly instructions are selected for a Store/Store fence.
+   *
+   * - ARM variants:   DMB ISHST (store-store fence for inner shareable domain)
+   * - Intel variants: no-op (store-store does not need a fence).
+   */
+
+  /// CHECK-START-ARM64: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NEXT: dmb ishst
+
+  /// CHECK-START-ARM: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NEXT: dmb ishst
+
+  /// CHECK-START-X86_64: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  {{[slm]}}fence
+
+  /// CHECK-START-X86: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  {{[slm]}}fence
   public ClassWithFinals() {
     // Exactly one constructor barrier.
     x = 0;
   }
 
-  /// CHECK-START: void ClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
   public ClassWithFinals(int x) {
     // This should have exactly two barriers:
@@ -55,11 +78,11 @@
 }
 
 class InheritFromClassWithFinals extends ClassWithFinals {
-  /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public InheritFromClassWithFinals() {
     // Should inline the super constructor.
@@ -67,23 +90,23 @@
     // Exactly one constructor barrier here.
   }
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after)
   /// CHECK:      InvokeStaticOrDirect
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after)
+  /// CHECK-NOT:  ConstructorFence
   public InheritFromClassWithFinals(boolean cond) {
     super(cond);
     // should not inline the super constructor
   }
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  ConstructorFence
   /// CHECK:      ReturnVoid
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public InheritFromClassWithFinals(int unused) {
     // Should inline the super constructor and insert a memory barrier.
@@ -96,21 +119,21 @@
 class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals {
   final int y;
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after)
   /// CHECK-NOT: InvokeStaticOrDirect
   public HaveFinalsAndInheritFromClassWithFinals() {
     // Should inline the super constructor and keep the memory barrier.
     y = 0;
   }
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) inliner (after)
   /// CHECK:      InvokeStaticOrDirect
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
   public HaveFinalsAndInheritFromClassWithFinals(boolean cond) {
     super(cond);
@@ -118,15 +141,15 @@
     y = 0;
   }
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public HaveFinalsAndInheritFromClassWithFinals(int unused) {
     // Should inline the super constructor and keep keep both memory barriers.
@@ -141,55 +164,55 @@
 
 public class Main {
 
-  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
+  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after)
   /// CHECK:      InvokeStaticOrDirect
 
-  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after)
+  /// CHECK-NOT:  ConstructorFence
   public static ClassWithFinals noInlineNoConstructorBarrier() {
     return new ClassWithFinals(false);
     // should not inline the constructor
   }
 
-  /// CHECK-START: void Main.inlineNew() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew() register (after)
+  /// CHECK-START: void Main.inlineNew() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew() {
     new ClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew1() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew1() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew1() register (after)
+  /// CHECK-START: void Main.inlineNew1() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew1() {
     new InheritFromClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew2() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew2() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew2() register (after)
+  /// CHECK-START: void Main.inlineNew2() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew2() {
     new HaveFinalsAndInheritFromClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew3() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew3() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew3() register (after)
+  /// CHECK-START: void Main.inlineNew3() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew3() {
     new HaveFinalsAndInheritFromClassWithFinals();
diff --git a/test/530-checker-lse-ctor-fences/expected.txt b/test/530-checker-lse-ctor-fences/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/expected.txt
diff --git a/test/530-checker-lse-ctor-fences/info.txt b/test/530-checker-lse-ctor-fences/info.txt
new file mode 100644
index 0000000..ccc7b47
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/info.txt
@@ -0,0 +1 @@
+Checker test for testing load-store elimination with final fields (constructor fences).
diff --git a/test/530-checker-lse-ctor-fences/src/Main.java b/test/530-checker-lse-ctor-fences/src/Main.java
new file mode 100644
index 0000000..7755875
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/src/Main.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This base class has a single final field;
+// the constructor should have one fence.
+class Circle {
+  Circle(double radius) {
+    this.radius = radius;
+  }
+  public double getRadius() {
+    return radius;
+  }
+  public double getArea() {
+    return radius * radius * Math.PI;
+  }
+
+  public double getCircumference() {
+    return 2 * Math.PI * radius;
+  }
+
+  private final double radius;
+}
+
+// This subclass adds an extra final field;
+// there should be an extra constructor fence added
+// (for a total of 2 after inlining).
+class Ellipse extends Circle {
+  Ellipse(double vertex, double covertex) {
+    super(vertex);
+
+    this.covertex = covertex;
+  }
+
+  public double getVertex() {
+    return getRadius();
+  }
+
+  public double getCovertex() {
+    return covertex;
+  }
+
+  @Override
+  public double getArea() {
+    return getRadius() * covertex * Math.PI;
+  }
+
+  private final double covertex;
+}
+
+class CalcCircleAreaOrCircumference {
+  public static final int TYPE_AREA = 0;
+  public static final int TYPE_CIRCUMFERENCE = 1;
+
+  double value;
+
+  public CalcCircleAreaOrCircumference(int type) {
+    this.type = type;
+  }
+
+  final int type;
+}
+
+public class Main {
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: ConstructorFence
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Make sure the constructor fence gets eliminated when the allocation is eliminated.
+  static double calcCircleArea(double radius) {
+    return new Circle(radius).getArea();
+  }
+
+  /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: ConstructorFence
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Multiple constructor fences can accumulate through inheritance, make sure
+  // they are all eliminated when the allocation is eliminated.
+  static double calcEllipseArea(double vertex, double covertex) {
+    return new Ellipse(vertex, covertex).getArea();
+  }
+
+  /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: ConstructorFence
+
+  //
+  // The object allocation will not be eliminated by LSE because of aliased stores.
+  // However the object is still a singleton, so it never escapes the current thread.
+  // There should not be a constructor fence here after LSE.
+  static double calcCircleAreaOrCircumference(double radius, boolean area_or_circumference) {
+    CalcCircleAreaOrCircumference calc =
+      new CalcCircleAreaOrCircumference(
+          area_or_circumference ? CalcCircleAreaOrCircumference.TYPE_AREA :
+          CalcCircleAreaOrCircumference.TYPE_CIRCUMFERENCE);
+
+    if (area_or_circumference) {
+      // Area
+      calc.value = Math.PI * Math.PI * radius;
+    } else {
+      // Circumference
+      calc.value = 2 * Math.PI * radius;
+    }
+
+    return calc.value;
+  }
+
+  /// CHECK-START: Circle Main.makeCircle(double) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: ConstructorFence
+
+  // The object allocation is considered a singleton by LSE,
+  // but we cannot eliminate the new because it is returned.
+  //
+  // The constructor fence must also not be removed because the object could escape the
+  // current thread (in the caller).
+  static Circle makeCircle(double radius) {
+    return new Circle(radius);
+  }
+
+  static void assertIntEquals(int result, int expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertFloatEquals(float result, float expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertDoubleEquals(double result, double expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertInstanceOf(Object result, Class<?> expected) {
+    if (result.getClass() != expected) {
+      throw new Error("Expected type: " + expected + ", found : " + result.getClass());
+    }
+  }
+
+  public static void main(String[] args) {
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcCircleArea(Math.PI));
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcEllipseArea(Math.PI, Math.PI));
+    assertDoubleEquals(2 * Math.PI * Math.PI, calcCircleAreaOrCircumference(Math.PI, false));
+    assertInstanceOf(makeCircle(Math.PI), Circle.class);
+  }
+
+  static boolean sFlag;
+}
diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java
index 0fe3d87..491a9a1 100644
--- a/test/530-checker-lse2/src/Main.java
+++ b/test/530-checker-lse2/src/Main.java
@@ -76,16 +76,27 @@
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
   /// CHECK-DAG: NewInstance
   /// CHECK-DAG: NewInstance
@@ -95,9 +106,14 @@
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
   /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: ConstructorFence
 
   private float testMethod() {
     {
+      // Each of the "new" statements here will initialize an object with final fields,
+      // which after inlining will also retain a constructor fence.
+      //
+      // After LSE we remove the 'new-instance' and the associated constructor fence.
       int lI0 = (-1456058746 << mI);
       mD = ((double)(int)(double) mD);
       for (int i0 = 56 - 1; i0 >= 0; i0--) {
diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
index 345e9fd..26d87b1 100644
--- a/test/569-checker-pattern-replacement/src/Main.java
+++ b/test/569-checker-pattern-replacement/src/Main.java
@@ -331,7 +331,7 @@
 
   /// CHECK-START: double Main.constructBase() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBase() {
@@ -347,7 +347,7 @@
 
   /// CHECK-START: double Main.constructBase(int) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int) inliner (after)
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
@@ -371,7 +371,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBaseWith0() {
@@ -387,7 +387,7 @@
 
   /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
   /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
@@ -411,7 +411,7 @@
 
   /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
   /// CHECK-NOT:                      InstanceFieldSet
@@ -431,7 +431,7 @@
 
   /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -460,7 +460,7 @@
 
   /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -493,7 +493,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -543,7 +543,7 @@
 
   /// CHECK-START: double Main.constructBase(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(double) inliner (after)
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
@@ -567,7 +567,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBaseWith0d() {
@@ -605,7 +605,7 @@
 
   /// CHECK-START: double Main.constructBase(int, long) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int, long) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -628,7 +628,7 @@
 
   /// CHECK-START: double Main.constructDerived() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerived() {
@@ -644,7 +644,7 @@
 
   /// CHECK-START: double Main.constructDerived(int) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int) inliner (after)
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
@@ -668,7 +668,7 @@
 
   /// CHECK-START: double Main.constructDerivedWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWith0() {
@@ -684,7 +684,7 @@
 
   /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InstanceFieldSet
@@ -702,7 +702,7 @@
 
   /// CHECK-START: double Main.constructDerived(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(double) inliner (after)
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
@@ -726,7 +726,7 @@
 
   /// CHECK-START: double Main.constructDerivedWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWith0d() {
@@ -744,7 +744,7 @@
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -794,7 +794,7 @@
 
   /// CHECK-START: double Main.constructDerived(float) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(float) inliner (after)
   /// CHECK-DAG:  <<Value:f\d+>>      ParameterValue
@@ -821,7 +821,7 @@
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -852,7 +852,7 @@
 
   /// CHECK-START: int Main.constructBaseWithFinalField() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructBaseWithFinalField() {
@@ -873,7 +873,7 @@
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -892,7 +892,7 @@
 
   /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructBaseWithFinalFieldWith0() {
@@ -907,7 +907,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalField() {
@@ -928,7 +928,7 @@
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -947,7 +947,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0() {
@@ -968,7 +968,7 @@
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -987,7 +987,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0d() {
@@ -1009,7 +1009,7 @@
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -1017,8 +1017,8 @@
   /// CHECK-NOT:                      InstanceFieldSet
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
-  /// CHECK-DAG:                      MemoryBarrier
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
+  /// CHECK-NOT:                      ConstructorFence
 
   public static double constructDerivedWithFinalField(int intValue, double doubleValue) {
     DerivedWithFinalField d = new DerivedWithFinalField(intValue, doubleValue);
@@ -1034,7 +1034,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0And0d() {
@@ -1049,7 +1049,7 @@
 
   /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex() {
@@ -1070,7 +1070,7 @@
   /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
 
   /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after)
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex(int intValue) {
@@ -1091,7 +1091,7 @@
   /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
 
   /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after)
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDexWith0() {
@@ -1107,7 +1107,7 @@
 
   /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex(long dummy) {
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 6efc168..d1f36ed 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -280,7 +280,17 @@
     }
   }
 
-  // If vectorized, string encoding should be dealt with.
+  /// CHECK-START: void Main.string2Bytes(char[], java.lang.String) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.string2Bytes(char[], java.lang.String) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  // NOTE: should correctly deal with compressed and uncompressed cases.
   private static void string2Bytes(char[] a, String b) {
     int min = Math.min(a.length, b.length());
     for (int i = 0; i < min; i++) {
@@ -411,6 +421,11 @@
     for (int i = 0; i < aa.length; i++) {
       expectEquals(aa[i], bb.charAt(i));
     }
+    String cc = "\u1010\u2020llo world how are y\u3030\u4040";
+    string2Bytes(aa, cc);
+    for (int i = 0; i < aa.length; i++) {
+      expectEquals(aa[i], cc.charAt(i));
+    }
 
     envUsesInCond();
 
diff --git a/test/648-many-direct-methods/build b/test/648-many-direct-methods/build
new file mode 100755
index 0000000..7e888e5
--- /dev/null
+++ b/test/648-many-direct-methods/build
@@ -0,0 +1,25 @@
+#! /bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Exit on a failure.
+set -e
+
+mkdir -p ./src
+
+# Generate the Java file or fail.
+./util-src/generate_java.py ./src
+
+./default-build "$@"
diff --git a/test/648-many-direct-methods/expected.txt b/test/648-many-direct-methods/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/648-many-direct-methods/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/648-many-direct-methods/info.txt b/test/648-many-direct-methods/info.txt
new file mode 100644
index 0000000..a65ab80
--- /dev/null
+++ b/test/648-many-direct-methods/info.txt
@@ -0,0 +1,2 @@
+Regression test checking that the runtime can handle a huge number of
+direct methods (b/33650497).
diff --git a/test/648-many-direct-methods/util-src/generate_java.py b/test/648-many-direct-methods/util-src/generate_java.py
new file mode 100755
index 0000000..6cae868
--- /dev/null
+++ b/test/648-many-direct-methods/util-src/generate_java.py
@@ -0,0 +1,137 @@
+#! /usr/bin/python3
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Generate Java test files for test 648-many-direct-methods.
+"""
+
+import os
+import sys
+from pathlib import Path
+
+BUILD_TOP = os.getenv("ANDROID_BUILD_TOP")
+if BUILD_TOP is None:
+  print("ANDROID_BUILD_TOP not set. Please run build/envsetup.sh", file=sys.stderr)
+  sys.exit(1)
+
+# Allow us to import utils and mixins.
+sys.path.append(str(Path(BUILD_TOP)/"art"/"test"/"utils"/"python"))
+
+from testgen.utils import get_copyright, subtree_sizes, gensym, filter_blanks
+import testgen.mixins as mixins
+
+class MainClass(mixins.DumpMixin, mixins.Named, mixins.JavaFileMixin):
+  """
+  A Main.java file containing the Main class and the main function. It will run
+  all the test functions we have.
+  """
+
+  MAIN_CLASS_TEMPLATE = """{copyright}
+public class Main {{
+{main_func}
+{test_groups}
+
+}}"""
+
+  MAIN_FUNCTION_TEMPLATE = """
+  public static void main(String[] args) {
+    System.out.println("passed");
+  }"""
+
+  def __init__(self):
+    """
+    Initialize this MainClass. We start out with no tests.
+    """
+    self.tests = set()
+
+  def add_test_method(self, num):
+    """
+    Add test method number 'num'
+    """
+    self.tests.add(TestMethod(num))
+
+  def get_name(self):
+    """
+    Get the name of this class
+    """
+    return "Main"
+
+  def __str__(self):
+    """
+    Print the MainClass Java code.
+    """
+    all_tests = sorted(self.tests)
+    test_groups = ""
+    for t in all_tests:
+      test_groups += str(t)
+    main_func = self.MAIN_FUNCTION_TEMPLATE
+
+    return self.MAIN_CLASS_TEMPLATE.format(copyright = get_copyright("java"),
+                                           main_func = main_func,
+                                           test_groups = test_groups)
+
+class TestMethod(mixins.Named, mixins.NameComparableMixin):
+  """
+  A function that represents a test method. Should only be
+  constructed by MainClass.add_test_method.
+  """
+
+  TEST_FUNCTION_TEMPLATE = """
+  public static void {fname}() {{}}"""
+
+  def __init__(self, farg):
+    """
+    Initialize a test method for the given argument.
+    """
+    self.farg = farg
+
+  def get_name(self):
+    """
+    Get the name of this test method.
+    """
+    return "method{:05d}".format(self.farg)
+
+  def __str__(self):
+    """
+    Print the Java code of this test method.
+    """
+    return self.TEST_FUNCTION_TEMPLATE.format(fname=self.get_name())
+
+# Number of generated test methods. This number has been chosen to
+# make sure the number of direct methods in class Main is greater or
+# equal to 2^16, and thus requires an *unsigned* 16-bit (short)
+# integer to be represented (b/33650497).
+NUM_TEST_METHODS = 32768
+
+def create_test_file():
+  """
+  Creates the object representing the test file. It just needs to be dumped.
+  """
+  mc = MainClass()
+  for i in range(1, NUM_TEST_METHODS + 1):
+    mc.add_test_method(i)
+  return mc
+
+def main(argv):
+  java_dir = Path(argv[1])
+  if not java_dir.exists() or not java_dir.is_dir():
+    print("{} is not a valid Java dir".format(java_dir), file=sys.stderr)
+    sys.exit(1)
+  mainclass = create_test_file()
+  mainclass.dump(java_dir)
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/test/987-stack-trace-dumping/expected.txt b/test/987-stack-trace-dumping/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/987-stack-trace-dumping/expected.txt
diff --git a/test/987-stack-trace-dumping/info.txt b/test/987-stack-trace-dumping/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/987-stack-trace-dumping/info.txt
diff --git a/test/987-stack-trace-dumping/run b/test/987-stack-trace-dumping/run
new file mode 100755
index 0000000..dee3e8b
--- /dev/null
+++ b/test/987-stack-trace-dumping/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ask for stack traces to be dumped to a file rather than to stdout.
+./default-run "$@" --set-stack-trace-dump-dir
diff --git a/test/987-stack-trace-dumping/src/Main.java b/test/987-stack-trace-dumping/src/Main.java
new file mode 100644
index 0000000..d1e8a1b
--- /dev/null
+++ b/test/987-stack-trace-dumping/src/Main.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+        if (args.length != 3) {
+            throw new AssertionError("Unexpected number of args: " + args.length);
+        }
+
+        if (!"--stack-trace-dir".equals(args[1])) {
+            throw new AssertionError("Unexpected argument in position 1: " + args[1]);
+        }
+
+        // Send ourselves signal 3, which forces stack traces to be written to disk.
+        android.system.Os.kill(android.system.Os.getpid(), 3);
+
+        File[] files = null;
+        final String stackTraceDir = args[2];
+        for (int i = 0; i < 5; ++i) {
+            // Give the signal handler some time to run and dump traces - up to a maximum
+            // of 5 seconds. This is a kludge, but it's hard to do this without using things
+            // like inotify / WatchService and the like.
+            Thread.sleep(1000);
+
+            files = (new File(stackTraceDir)).listFiles();
+            if (files != null && files.length == 1) {
+                break;
+            }
+        }
+
+
+        if (files == null) {
+            throw new AssertionError("Gave up waiting for traces: " + java.util.Arrays.toString(files));
+        }
+
+        final String fileName = files[0].getName();
+        if (!fileName.startsWith("anr-pid")) {
+            throw new AssertionError("Unexpected prefix: " + fileName);
+        }
+
+        if (!fileName.contains(String.valueOf(android.system.Os.getpid()))) {
+            throw new AssertionError("File name does not contain process PID: " + fileName);
+        }
+    }
+}
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 73aecf5..95908d9 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -67,6 +67,11 @@
 PROFILE="n"
 RANDOM_PROFILE="n"
 
+# if "y", set -Xstacktracedir and inform the test of its location. When
+# this is set, stack trace dumps (from signal 3) will be written to a file
+# under this directory instead of stdout.
+SET_STACK_TRACE_DUMP_DIR="n"
+
 # if "y", run 'sync' before dalvikvm to make sure all files from
 # build step (e.g. dex2oat) were finished writing.
 SYNC_BEFORE_RUN="n"
@@ -283,6 +288,9 @@
     elif [ "x$1" = "x--random-profile" ]; then
         RANDOM_PROFILE="y"
         shift
+    elif [ "x$1" = "x--set-stack-trace-dump-dir" ]; then
+        SET_STACK_TRACE_DUMP_DIR="y"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -291,12 +299,22 @@
     fi
 done
 
+mkdir_locations=""
+
 if [ "$USE_JVM" = "n" ]; then
     FLAGS="${FLAGS} ${ANDROID_FLAGS}"
     for feature in ${EXPERIMENTAL}; do
         FLAGS="${FLAGS} -Xexperimental:${feature} -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:${feature}"
         COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xexperimental:${feature}"
     done
+
+    if [ "$SET_STACK_TRACE_DUMP_DIR" = "y" ]; then
+        # Note that DEX_LOCATION is used as a proxy for tmpdir throughout this
+        # file (it will be under the test specific folder).
+        mkdir_locations="${mkdir_locations} $DEX_LOCATION/stack_traces"
+        FLAGS="${FLAGS} -Xstacktracedir:$DEX_LOCATION/stack_traces"
+        ARGS="${ARGS} --stack-trace-dir $DEX_LOCATION/stack_traces"
+    fi
 fi
 
 if [ "x$1" = "x" ] ; then
@@ -536,7 +554,7 @@
 profman_cmdline="true"
 dex2oat_cmdline="true"
 vdex_cmdline="true"
-mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
+mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/dalvik-cache/$ISA"
 strip_cmdline="true"
 sync_cmdline="true"
 
@@ -569,7 +587,11 @@
     app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art"
   fi
 
-  dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \
+  dex2oat_binary=dex2oatd
+  if  [[ "$TEST_IS_NDEBUG" = "y" ]]; then
+    dex2oat_binary=dex2oat
+  fi
+  dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/$dex2oat_binary \
                       $COMPILE_FLAGS \
                       --boot-image=${BOOT_IMAGE} \
                       --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 0e42a29..ea810db 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -108,20 +108,19 @@
                         "non-deterministic. Same for 913."]
     },
     {
-        "tests": "961-default-iface-resolution-gen",
+        "tests": ["961-default-iface-resolution-gen",
+                  "964-default-iface-init-gen",
+                  "968-default-partial-compile-gen"],
         "variant": "gcstress",
-        "description": ["961-default-iface-resolution-gen and",
-                        "964-default-iface-init-genare very long tests that",
+        "description": ["961-default-iface-resolution-gen,",
+                        "968-default-partial-compile-gen and",
+                        "964-default-iface-init-gen are very long tests that",
                         "often will take more than the timeout to run when",
                         "gcstress is enabled. This is because gcstress slows",
                         "down allocations significantly which these tests do a",
                         "lot."]
     },
     {
-        "tests": "964-default-iface-init-gen",
-        "variant": "gcstress"
-    },
-    {
         "tests": "154-gc-loop",
         "variant": "gcstress | jit & debug",
         "description": ["154-gc-loop depends GC not happening too often"],
@@ -602,5 +601,86 @@
         ],
         "bug": "b/37239009",
         "variant": "jvmti-stress"
+    },
+    {
+        "tests": [
+            "004-JniTest",
+            "004-NativeAllocations",
+            "004-ReferenceMap",
+            "004-StackWalk",
+            "048-reflect-v8",
+            "089-many-methods",
+            "138-duplicate-classes-check",
+            "146-bad-interface",
+            "157-void-class",
+            "563-checker-invoke-super",
+            "580-checker-string-fact-intrinsics",
+            "596-monitor-inflation",
+            "604-hot-static-interface",
+            "612-jit-dex-cache",
+            "613-inlining-dex-cache",
+            "616-cha-interface-default",
+            "636-wrong-static-access",
+            "909-attach-agent",
+            "910-methods",
+            "911-get-stack-trace",
+            "912-classes",
+            "913-heaps",
+            "914-hello-obsolescence",
+            "915-obsolete-2",
+            "916-obsolete-jit",
+            "919-obsolete-fields",
+            "921-hello-failure",
+            "926-multi-obsolescence",
+            "940-recursive-obsolete",
+            "941-recurive-obsolete-jit",
+            "942-private-recursive",
+            "943-private-recursive-jit",
+            "945-obsolete-native",
+            "946-obsolete-throw",
+            "948-change-annotations",
+            "950-redefine-intrinsic",
+            "951-threaded-obsolete",
+            "952-invoke-custom",
+            "953-invoke-polymorphic-compiler",
+            "956-methodhandles",
+            "957-methodhandle-transforms",
+            "958-methodhandle-stackframe",
+            "959-invoke-polymorphic-accessors",
+            "960-default-smali",
+            "961-default-iface-resolution-gen",
+            "962-iface-static",
+            "963-default-range-smali",
+            "964-default-iface-init-gen",
+            "965-default-verify",
+            "966-default-conflict",
+            "967-default-ame",
+            "969-iface-super",
+            "981-dedup-original-dex",
+            "984-obsolete-invoke",
+            "985-re-obsolete",
+            "987-stack-trace-dumping"
+        ],
+        "description": "The tests above fail with --build-with-javac-dx.",
+        "env_vars": {"ANDROID_COMPILE_WITH_JACK": "false"},
+        "bug": "b/37636792"
+    },
+    {
+        "tests": [
+            "536-checker-needs-access-check",
+            "537-checker-inline-and-unverified",
+            "569-checker-pattern-replacement",
+            "586-checker-null-array-get"
+        ],
+        "description": [
+            "Tests that have verify-at-runtime classes, but being compiled when using vdex."
+        ],
+        "variant": "speed-profile"
+    },
+    {
+        "tests": "648-many-direct-methods",
+        "variant": "debug",
+        "description": "Test disabled in debug mode because of dex2oatd timeouts.",
+        "bug": "b/33650497"
     }
 ]
diff --git a/test/testrunner/env.py b/test/testrunner/env.py
index 7d9297f..a0c4ea8 100644
--- a/test/testrunner/env.py
+++ b/test/testrunner/env.py
@@ -95,6 +95,9 @@
 
 ANDROID_BUILD_TOP = _get_android_build_top()
 
+# Compiling with jack? Possible values in (True, False, 'default')
+ANDROID_COMPILE_WITH_JACK = _getEnvBoolean('ANDROID_COMPILE_WITH_JACK', 'default')
+
 # Directory used for temporary test files on the host.
 ART_HOST_TEST_DIR = tempfile.mkdtemp(prefix = 'test-art-')
 
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index 8072631..a809246 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -450,6 +450,12 @@
           options_test += ' --instruction-set-features ' + \
                           env.HOST_2ND_ARCH_PREFIX_DEX2OAT_HOST_INSTRUCTION_SET_FEATURES
 
+      # Use the default run-test behavior unless ANDROID_COMPILE_WITH_JACK is explicitly set.
+      if env.ANDROID_COMPILE_WITH_JACK == True:
+        options_test += ' --build-with-jack'
+      elif env.ANDROID_COMPILE_WITH_JACK == False:
+        options_test += ' --build-with-javac-dx'
+
       # TODO(http://36039166): This is a temporary solution to
       # fix build breakages.
       options_test = (' --output-path %s') % (
diff --git a/tools/cpp-define-generator/constant_card_table.def b/tools/cpp-define-generator/constant_card_table.def
new file mode 100644
index 0000000..ae3e8f3
--- /dev/null
+++ b/tools/cpp-define-generator/constant_card_table.def
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Export heap values.
+
+#if defined(DEFINE_INCLUDE_DEPENDENCIES)
+#include "gc/accounting/card_table.h"
+#endif
+
+// Size of references to the heap on the stack.
+DEFINE_EXPR(CARD_TABLE_CARD_SHIFT, size_t, art::gc::accounting::CardTable::kCardShift)
+
diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def
index 13371a1..b8947de 100644
--- a/tools/cpp-define-generator/offsets_all.def
+++ b/tools/cpp-define-generator/offsets_all.def
@@ -49,6 +49,7 @@
 // TODO: MIRROR_STRING offsets (depends on header size)
 #include "offset_dexcache.def"
 #include "constant_dexcache.def"
+#include "constant_card_table.def"
 #include "constant_heap.def"
 #include "constant_lockword.def"
 #include "constant_globals.def"