Merge "MIPS32: Improve integer division by constants"
diff --git a/Android.mk b/Android.mk
index 0d0003a..3438beb 100644
--- a/Android.mk
+++ b/Android.mk
@@ -146,7 +146,10 @@
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
 	adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT)
-	adb push $(ANDROID_PRODUCT_OUT)/data /data
+# Push the contents of the `data` dir into `/data` on the device.  If
+# `/data` already exists on the device, it is not overwritten, but its
+# contents are updated.
+	adb push $(ANDROID_PRODUCT_OUT)/data /
 endif
 endif
 
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index cd9d18d..a93d8a8 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -118,7 +118,8 @@
 ART_TARGET_CLANG_arm := false
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
-ART_TARGET_CLANG_mips64 :=
+# b/25928358, illegal instruction on mips64r6 with -O0
+ART_TARGET_CLANG_mips64 := false
 ART_TARGET_CLANG_x86 :=
 ART_TARGET_CLANG_x86_64 :=
 
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 3b2d1cc..f543a19 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -201,6 +201,9 @@
 # $(5): library dependencies (host only)
 # $(6): extra include directories
 # $(7): multilib (default: empty), valid values: {,32,64,both})
+# $(8): host prefer 32-bit: {true, false} (default: false).  If argument
+#       `multilib` is explicitly set to 64, ignore the "host prefer 32-bit"
+#       setting and only build a 64-bit executable on host.
 define build-art-multi-executable
   $(foreach debug_flavor,ndebug debug,
     $(foreach target_flavor,host target,
@@ -211,6 +214,7 @@
       art-multi-lib-dependencies-host := $(5)
       art-multi-include-extra := $(6)
       art-multi-multilib := $(7)
+      art-multi-host-prefer-32-bit := $(8)
 
       # Add either -host or -target specific lib dependencies to the lib dependencies.
       art-multi-lib-dependencies += $$(art-multi-lib-dependencies-$(target_flavor))
@@ -223,6 +227,14 @@
       # Build the env guard var name, e.g. ART_BUILD_HOST_NDEBUG.
       art-multi-env-guard := $$(call art-string-to-uppercase,ART_BUILD_$(target_flavor)_$(debug_flavor))
 
+      ifeq ($(target_flavor),host)
+        ifeq ($$($$(art-multi-host-prefer-32-bit)),true)
+          ifneq ($$($$(art-multi-multilib)),64)
+            art-multi-multilib := 32
+          endif
+        endif
+      endif
+
       # Build the art executable only if the corresponding env guard was set.
       ifeq ($$($$(art-multi-env-guard)),true)
         $$(eval $$(call build-art-executable,$$(art-multi-binary-name),$$(art-multi-source-files),$$(art-multi-lib-dependencies),$$(art-multi-include-extra),$(target_flavor),$(debug_flavor),$$(art-multi-multilib)))
@@ -236,6 +248,7 @@
       art-multi-lib-dependencies-host :=
       art-multi-include-extra :=
       art-multi-multilib :=
+      art-multi-host-prefer-32-bit :=
       art-multi-env-guard :=
     )
   )
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index dcde5ab..291a69d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -248,6 +248,7 @@
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
   compiler/jni/jni_compiler_test.cc \
+  compiler/linker/output_stream_test.cc \
   compiler/oat_test.cc \
   compiler/optimizing/bounds_check_elimination_test.cc \
   compiler/optimizing/dominator_test.cc \
@@ -266,7 +267,6 @@
   compiler/optimizing/ssa_test.cc \
   compiler/optimizing/stack_map_test.cc \
   compiler/optimizing/suspend_check_test.cc \
-  compiler/output_stream_test.cc \
   compiler/utils/arena_allocator_test.cc \
   compiler/utils/dedupe_set_test.cc \
   compiler/utils/swap_space_test.cc \
@@ -382,7 +382,7 @@
 LOCAL_MODULE_TAGS := optional
 LOCAL_CPP_EXTENSION := cc
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
-LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler libdl
 LOCAL_STATIC_LIBRARIES += libgtest
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
@@ -399,7 +399,7 @@
 LOCAL_CFLAGS := $(ART_HOST_CFLAGS)
 LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS)
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
-LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
+LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler
 LOCAL_STATIC_LIBRARIES := libgtest_host
 LOCAL_LDLIBS += -ldl -lpthread
@@ -542,7 +542,7 @@
   endif
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   LOCAL_SRC_FILES := $$(art_gtest_filename)
-  LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
+  LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime art/cmdline $$(art_gtest_extra_c_includes)
   LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libartd-disassembler
   LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index 2e9f208..4aced5b 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -196,6 +196,7 @@
         "  --boot-image=<file.art>: provide the image location for the boot class path.\n"
         "      Do not include the arch as part of the name, it is added automatically.\n"
         "      Example: --boot-image=/system/framework/boot.art\n"
+        "               (specifies /system/framework/<arch>/boot.art as the image file)\n"
         "\n";
     usage += StringPrintf(  // Optional.
         "  --instruction-set=(arm|arm64|mips|mips64|x86|x86_64): for locating the image\n"
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index c594adb..4751989 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -18,17 +18,17 @@
 
 #define CMDLINE_NDEBUG 1  // Do not output any debugging information for parsing.
 
-#include "cmdline/memory_representation.h"
-#include "cmdline/detail/cmdline_debug_detail.h"
+#include "memory_representation.h"
+#include "detail/cmdline_debug_detail.h"
 #include "cmdline_type_parser.h"
 
 // Includes for the types that are being specialized
 #include <string>
 #include "unit.h"
 #include "jdwp/jdwp.h"
-#include "runtime/base/logging.h"
-#include "runtime/base/time_utils.h"
-#include "runtime/experimental_flags.h"
+#include "base/logging.h"
+#include "base/time_utils.h"
+#include "experimental_flags.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "profiler_options.h"
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 3009b32..4b56804 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -25,10 +25,10 @@
 #include <numeric>
 #include <memory>
 
-#include "cmdline/cmdline_parse_result.h"
-#include "cmdline/token_range.h"
-#include "cmdline/unit.h"
-#include "cmdline/cmdline_types.h"
+#include "cmdline_parse_result.h"
+#include "cmdline_types.h"
+#include "token_range.h"
+#include "unit.h"
 
 namespace art {
   // Implementation details for the parser. Do not look inside if you hate templates.
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b05f479..348eabd 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -58,6 +58,10 @@
 	driver/compiler_driver.cc \
 	driver/compiler_options.cc \
 	driver/dex_compilation_unit.cc \
+	linker/buffered_output_stream.cc \
+	linker/file_output_stream.cc \
+	linker/output_stream.cc \
+	linker/vector_output_stream.cc \
 	linker/relative_patcher.cc \
 	jit/jit_compiler.cc \
 	jni/quick/calling_convention.cc \
@@ -69,6 +73,7 @@
 	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
+	optimizing/dex_cache_array_fixups_arm.cc \
 	optimizing/graph_checker.cc \
 	optimizing/graph_visualizer.cc \
 	optimizing/gvn.cc \
@@ -81,6 +86,7 @@
 	optimizing/load_store_elimination.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
+	optimizing/nodes_arm64.cc \
 	optimizing/optimization.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
@@ -98,16 +104,12 @@
 	trampolines/trampoline_compiler.cc \
 	utils/assembler.cc \
 	utils/swap_space.cc \
-	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
 	elf_writer_debug.cc \
 	elf_writer_quick.cc \
-	file_output_stream.cc \
 	image_writer.cc \
-	oat_writer.cc \
-	output_stream.cc \
-	vector_output_stream.cc
+	oat_writer.cc
 
 LIBART_COMPILER_SRC_FILES_arm := \
 	dex/quick/arm/assemble_arm.cc \
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 7b0e5af..1b57b7d 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -128,6 +128,7 @@
 #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
   if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
     switch (GetInstructionSet()) {                                                        \
+      case kArm64:                                                                        \
       case kThumb2:                                                                       \
       case kX86:                                                                          \
       case kX86_64:                                                                       \
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 10841e6..0eb3e43 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -264,18 +264,16 @@
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change) {
   DCHECK_EQ(class_loader.Get(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
-  ArtMethod* resolved_method = mUnit->GetClassLinker()->ResolveMethod(
-      *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
-  DCHECK_EQ(resolved_method == nullptr, soa.Self()->IsExceptionPending());
+  ArtMethod* resolved_method =
+      check_incompatible_class_change
+          ? mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kForceICCECheck>(
+              *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type)
+          : mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+              *dex_cache->GetDexFile(), method_idx, dex_cache, class_loader, nullptr, invoke_type);
   if (UNLIKELY(resolved_method == nullptr)) {
+    DCHECK(soa.Self()->IsExceptionPending());
     // Clean up any exception left by type resolution.
     soa.Self()->ClearException();
-    return nullptr;
-  }
-  if (check_incompatible_class_change &&
-      UNLIKELY(resolved_method->CheckIncompatibleClassChange(invoke_type))) {
-    // Silently return null on incompatible class change.
-    return nullptr;
   }
   return resolved_method;
 }
@@ -361,7 +359,7 @@
     ArtMethod* called_method;
     ClassLinker* class_linker = mUnit->GetClassLinker();
     if (LIKELY(devirt_target->dex_file == mUnit->GetDexFile())) {
-      called_method = class_linker->ResolveMethod(
+      called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, dex_cache, class_loader,
           nullptr, kVirtual);
     } else {
@@ -369,7 +367,7 @@
       auto target_dex_cache(hs.NewHandle(class_linker->RegisterDexFile(
           *devirt_target->dex_file,
           class_linker->GetOrCreateAllocatorForClassLoader(class_loader.Get()))));
-      called_method = class_linker->ResolveMethod(
+      called_method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *devirt_target->dex_file, devirt_target->dex_method_index, target_dex_cache,
           class_loader, nullptr, kVirtual);
     }
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index d67087e..a05105b 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -45,7 +45,6 @@
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_options.h"
-#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
 #include "profiler.h"
@@ -77,9 +76,6 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
-// Whether to produce 64-bit ELF files for 64-bit targets.
-static constexpr bool kProduce64BitELFFiles = true;
-
 // Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when
 // given, too all compilations.
 static constexpr bool kRestrictCompilationFiltersToImage = true;
@@ -1906,7 +1902,7 @@
       }
       if (resolve_fields_and_methods) {
         while (it.HasNextDirectMethod()) {
-          ArtMethod* method = class_linker->ResolveMethod(
+          ArtMethod* method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
               dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
@@ -1915,7 +1911,7 @@
           it.Next();
         }
         while (it.HasNextVirtualMethod()) {
-          ArtMethod* method = class_linker->ResolveMethod(
+          ArtMethod* method = class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
               dex_file, it.GetMemberIndex(), dex_cache, class_loader, nullptr,
               it.GetMethodInvokeType(class_def));
           if (method == nullptr) {
@@ -2514,19 +2510,6 @@
   return freezing_constructor_classes_.count(ClassReference(dex_file, class_def_index)) != 0;
 }
 
-bool CompilerDriver::WriteElf(const std::string& android_root,
-                              bool is_host,
-                              const std::vector<const art::DexFile*>& dex_files,
-                              OatWriter* oat_writer,
-                              art::File* file)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
-    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  } else {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
-  }
-}
-
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
     return false;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d90d610..1347b37 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -59,7 +59,6 @@
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
-class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -398,12 +397,6 @@
     support_boot_image_fixup_ = support_boot_image_fixup;
   }
 
-  bool WriteElf(const std::string& android_root,
-                bool is_host,
-                const std::vector<const DexFile*>& dex_files,
-                OatWriter* oat_writer,
-                File* file);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index aa31036..a551e4b 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -112,6 +112,12 @@
     this->PushData(ptr, num_bytes);
   }
 
+  void WriteExprLoc(Attribute attrib, const void* ptr, size_t num_bytes) {
+    AddAbbrevAttribute(attrib, DW_FORM_exprloc);
+    this->PushUleb128(dchecked_integral_cast<uint32_t>(num_bytes));
+    this->PushData(ptr, num_bytes);
+  }
+
   void WriteData1(Attribute attrib, uint8_t value) {
     AddAbbrevAttribute(attrib, DW_FORM_data1);
     this->PushUint8(value);
@@ -132,6 +138,11 @@
     this->PushUint64(value);
   }
 
+  void WriteSecOffset(Attribute attrib, uint32_t offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_sec_offset);
+    this->PushUint32(offset);
+  }
+
   void WriteSdata(Attribute attrib, int value) {
     AddAbbrevAttribute(attrib, DW_FORM_sdata);
     this->PushSleb128(value);
diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h
index 5464ed9..c3a3ca9 100644
--- a/compiler/dwarf/dwarf_test.h
+++ b/compiler/dwarf/dwarf_test.h
@@ -29,6 +29,7 @@
 #include "common_runtime_test.h"
 #include "elf_builder.h"
 #include "gtest/gtest.h"
+#include "linker/file_output_stream.h"
 #include "os.h"
 
 namespace art {
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index c75aeac..f76f76f 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -134,7 +134,7 @@
   Writer<> writer(debug_info);
   size_t start = writer.data()->size();
   writer.PushUint32(0);  // Length placeholder.
-  writer.PushUint16(3);  // Version.
+  writer.PushUint16(4);  // Version.
   writer.PushUint32(debug_abbrev_offset);
   writer.PushUint8(entries.Is64bit() ? 8 : 4);
   size_t entries_offset = writer.data()->size();
@@ -167,10 +167,7 @@
   Writer<> writer(debug_line);
   size_t header_start = writer.data()->size();
   writer.PushUint32(0);  // Section-length placeholder.
-  // Claim DWARF-2 version even though we use some DWARF-3 features.
-  // DWARF-2 consumers will ignore the unknown opcodes.
-  // This is what clang currently does.
-  writer.PushUint16(2);  // .debug_line version.
+  writer.PushUint16(3);  // .debug_line version.
   size_t header_length_pos = writer.data()->size();
   writer.PushUint32(0);  // Header-length placeholder.
   writer.PushUint8(1 << opcodes.GetCodeFactorBits());
diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h
new file mode 100644
index 0000000..a391e4d
--- /dev/null
+++ b/compiler/dwarf/method_debug_info.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+#define ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
+
+#include "dex_file.h"
+
+namespace art {
+class CompiledMethod;
+namespace dwarf {
+
+struct MethodDebugInfo {
+  const DexFile* dex_file_;
+  size_t class_def_index_;
+  uint32_t dex_method_index_;
+  uint32_t access_flags_;
+  const DexFile::CodeItem* code_item_;
+  bool deduped_;
+  uint32_t low_pc_;
+  uint32_t high_pc_;
+  CompiledMethod* compiled_method_;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_METHOD_DEBUG_INFO_H_
diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h
index 7045237..b67e8dd 100644
--- a/compiler/dwarf/register.h
+++ b/compiler/dwarf/register.h
@@ -35,9 +35,10 @@
   //   Arm64 mapping is correct since we already do this there.
   //   libunwind might struggle with the new mapping as well.
 
-  static Reg ArmCore(int num) { return Reg(num); }
+  static Reg ArmCore(int num) { return Reg(num); }  // R0-R15.
   static Reg ArmFp(int num) { return Reg(64 + num); }  // S0–S31.
-  static Reg Arm64Core(int num) { return Reg(num); }
+  static Reg ArmDp(int num) { return Reg(256 + num); }  // D0–D31.
+  static Reg Arm64Core(int num) { return Reg(num); }  // X0-X31.
   static Reg Arm64Fp(int num) { return Reg(64 + num); }  // V0-V31.
   static Reg MipsCore(int num) { return Reg(num); }
   static Reg Mips64Core(int num) { return Reg(num); }
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 6e8dfd6..bb07cc2 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -23,10 +23,10 @@
 #include "base/bit_utils.h"
 #include "base/casts.h"
 #include "base/unix_file/fd_file.h"
-#include "buffered_output_stream.h"
 #include "elf_utils.h"
-#include "file_output_stream.h"
 #include "leb128.h"
+#include "linker/error_delaying_output_stream.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
@@ -100,7 +100,7 @@
       header_.sh_entsize = entsize;
     }
 
-    virtual ~Section() {
+    ~Section() OVERRIDE {
       if (started_) {
         CHECK(finished_);
       }
@@ -120,8 +120,8 @@
       sections.push_back(this);
       // Align file position.
       if (header_.sh_type != SHT_NOBITS) {
-        header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign);
-        owner_->Seek(header_.sh_offset, kSeekSet);
+        header_.sh_offset = RoundUp(owner_->stream_.Seek(0, kSeekCurrent), header_.sh_addralign);
+        owner_->stream_.Seek(header_.sh_offset, kSeekSet);
       }
       // Align virtual memory address.
       if ((header_.sh_flags & SHF_ALLOC) != 0) {
@@ -139,7 +139,7 @@
         CHECK_GT(header_.sh_size, 0u);
       } else {
         // Use the current file position to determine section size.
-        off_t file_offset = owner_->Seek(0, kSeekCurrent);
+        off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent);
         CHECK_GE(file_offset, (off_t)header_.sh_offset);
         header_.sh_size = file_offset - header_.sh_offset;
       }
@@ -161,7 +161,7 @@
       } else {
         CHECK(started_);
         CHECK_NE(header_.sh_type, (Elf_Word)SHT_NOBITS);
-        return owner_->Seek(0, kSeekCurrent) - header_.sh_offset;
+        return owner_->stream_.Seek(0, kSeekCurrent) - header_.sh_offset;
       }
     }
 
@@ -176,15 +176,20 @@
     bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
       CHECK(started_);
       CHECK(!finished_);
-      owner_->WriteFully(buffer, byte_count);
-      return true;
+      return owner_->stream_.WriteFully(buffer, byte_count);
     }
 
     // This function always succeeds to simplify code.
     // Use builder's Good() to check the actual status.
     off_t Seek(off_t offset, Whence whence) OVERRIDE {
       // Forward the seek as-is and trust the caller to use it reasonably.
-      return owner_->Seek(offset, whence);
+      return owner_->stream_.Seek(offset, whence);
+    }
+
+    // This function flushes the output and returns whether it succeeded.
+    // If there was a previous failure, this does nothing and returns false, i.e. failed.
+    bool Flush() OVERRIDE {
+      return owner_->stream_.Flush();
     }
 
     Elf_Word GetSectionIndex() const {
@@ -270,26 +275,24 @@
   };
 
   ElfBuilder(InstructionSet isa, OutputStream* output)
-    : isa_(isa),
-      output_(output),
-      output_good_(true),
-      output_offset_(0),
-      rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
-      bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
-      dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
-      hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
-      dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
-      eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
-      eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
-      strtab_(this, ".strtab", 0, kPageSize),
-      symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
-      debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
-      debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
-      shstrtab_(this, ".shstrtab", 0, 1),
-      virtual_address_(0) {
+      : isa_(isa),
+        stream_(output),
+        rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0),
+        bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize),
+        dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_),
+        hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)),
+        dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)),
+        eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0),
+        eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0),
+        strtab_(this, ".strtab", 0, kPageSize),
+        symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_),
+        debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0),
+        debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0),
+        shstrtab_(this, ".shstrtab", 0, 1),
+        virtual_address_(0) {
     text_.phdr_flags_ = PF_R | PF_X;
     bss_.phdr_flags_ = PF_R | PF_W;
     dynamic_.phdr_flags_ = PF_R | PF_W;
@@ -312,7 +315,7 @@
 
   // Encode patch locations as LEB128 list of deltas between consecutive addresses.
   // (exposed publicly for tests)
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+  static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations,
                                std::vector<uint8_t>* buffer) {
     buffer->reserve(buffer->size() + locations.size() * 2);  // guess 2 bytes per ULEB128.
     uintptr_t address = 0;  // relative to start of section.
@@ -323,9 +326,9 @@
     }
   }
 
-  void WritePatches(const char* name, const std::vector<uintptr_t>* patch_locations) {
+  void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) {
     std::vector<uint8_t> buffer;
-    EncodeOatPatches(*patch_locations, &buffer);
+    EncodeOatPatches(patch_locations, &buffer);
     std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0));
     s->Start();
     s->WriteFully(buffer.data(), buffer.size());
@@ -346,7 +349,7 @@
     // We do not know the number of headers until later, so
     // it is easiest to just reserve a fixed amount of space.
     int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders;
-    Seek(size, kSeekSet);
+    stream_.Seek(size, kSeekSet);
     virtual_address_ += size;
   }
 
@@ -370,9 +373,14 @@
       shdrs.push_back(section->header_);
     }
     Elf_Off section_headers_offset;
-    section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off));
-    Seek(section_headers_offset, kSeekSet);
-    WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+    section_headers_offset = RoundUp(stream_.Seek(0, kSeekCurrent), sizeof(Elf_Off));
+    stream_.Seek(section_headers_offset, kSeekSet);
+    stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0]));
+
+    // Flush everything else before writing the program headers. This should prevent
+    // the OS from reordering writes, so that we don't end up with valid headers
+    // and partially written data if we suddenly lose power, for example.
+    stream_.Flush();
 
     // Write the initial file headers.
     std::vector<Elf_Phdr> phdrs = MakeProgramHeaders();
@@ -382,9 +390,10 @@
     elf_header.e_phnum = phdrs.size();
     elf_header.e_shnum = shdrs.size();
     elf_header.e_shstrndx = shstrtab_.GetSectionIndex();
-    Seek(0, kSeekSet);
-    WriteFully(&elf_header, sizeof(elf_header));
-    WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Seek(0, kSeekSet);
+    stream_.WriteFully(&elf_header, sizeof(elf_header));
+    stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0]));
+    stream_.Flush();
   }
 
   // The running program does not have access to section headers
@@ -462,53 +471,15 @@
 
   // Returns true if all writes and seeks on the output stream succeeded.
   bool Good() {
-    return output_good_;
+    return stream_.Good();
+  }
+
+  // Returns the builder's internal stream.
+  OutputStream* GetStream() {
+    return &stream_;
   }
 
  private:
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  void WriteFully(const void* buffer, size_t byte_count) {
-    if (output_good_) {
-      if (!output_->WriteFully(buffer, byte_count)) {
-        PLOG(ERROR) << "Failed to write " << byte_count
-                    << " bytes to ELF file at offset " << output_offset_;
-        output_good_ = false;
-      }
-    }
-    output_offset_ += byte_count;
-  }
-
-  // This function always succeeds to simplify code.
-  // Use Good() to check the actual status of the output stream.
-  off_t Seek(off_t offset, Whence whence) {
-    // We keep shadow copy of the offset so that we return
-    // the expected value even if the output stream failed.
-    off_t new_offset;
-    switch (whence) {
-      case kSeekSet:
-        new_offset = offset;
-        break;
-      case kSeekCurrent:
-        new_offset = output_offset_ + offset;
-        break;
-      default:
-        LOG(FATAL) << "Unsupported seek type: " << whence;
-        UNREACHABLE();
-    }
-    if (output_good_) {
-      off_t actual_offset = output_->Seek(offset, whence);
-      if (actual_offset == (off_t)-1) {
-        PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset
-                    << " whence=" << whence << " new_offset=" << new_offset;
-        output_good_ = false;
-      }
-      DCHECK_EQ(actual_offset, new_offset);
-    }
-    output_offset_ = new_offset;
-    return new_offset;
-  }
-
   static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
     Elf_Ehdr elf_header = Elf_Ehdr();
     switch (isa) {
@@ -660,9 +631,7 @@
 
   InstructionSet isa_;
 
-  OutputStream* output_;
-  bool output_good_;  // True if all writes to output succeeded.
-  off_t output_offset_;  // Keep track of the current position in the stream.
+  ErrorDelayingOutputStream stream_;
 
   Section rodata_;
   Section text_;
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03f8ceb..c5a0fd5 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -25,13 +25,16 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "os.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class CompilerDriver;
-class DexFile;
 class ElfFile;
-class OatWriter;
+class OutputStream;
+
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
 
 class ElfWriter {
  public:
@@ -46,21 +49,26 @@
 
   static bool Fixup(File* file, uintptr_t oat_data_begin);
 
- protected:
-  ElfWriter(const CompilerDriver& driver, File* elf_file)
-    : compiler_driver_(&driver), elf_file_(elf_file) {
-  }
-
   virtual ~ElfWriter() {}
 
-  virtual bool Write(OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host)
-      SHARED_REQUIRES(Locks::mutator_lock_) = 0;
+  virtual void Start() = 0;
+  virtual OutputStream* StartRoData() = 0;
+  virtual void EndRoData(OutputStream* rodata) = 0;
+  virtual OutputStream* StartText() = 0;
+  virtual void EndText(OutputStream* text) = 0;
+  virtual void SetBssSize(size_t bss_size) = 0;
+  virtual void WriteDynamicSection() = 0;
+  virtual void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) = 0;
+  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
+  virtual bool End() = 0;
 
-  const CompilerDriver* const compiler_driver_;
-  File* const elf_file_;
+  // Get the ELF writer's stream. This stream can be used for writing data directly
+  // to a section after the section has been finished. When that's done, the user
+  // should Seek() back to the position where the stream was before this operation.
+  virtual OutputStream* GetStream() = 0;
+
+ protected:
+  ElfWriter() = default;
 };
 
 }  // namespace art
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
index 5e2a8bf..7696b94 100644
--- a/compiler/elf_writer_debug.cc
+++ b/compiler/elf_writer_debug.cc
@@ -17,6 +17,7 @@
 #include "elf_writer_debug.h"
 
 #include <unordered_set>
+#include <vector>
 
 #include "base/casts.h"
 #include "base/stl_util.h"
@@ -25,14 +26,54 @@
 #include "dex_file-inl.h"
 #include "dwarf/dedup_vector.h"
 #include "dwarf/headers.h"
+#include "dwarf/method_debug_info.h"
 #include "dwarf/register.h"
 #include "elf_builder.h"
 #include "oat_writer.h"
 #include "utils.h"
+#include "stack_map.h"
 
 namespace art {
 namespace dwarf {
 
+static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmCore(machine_reg);
+    case kArm64:
+      return Reg::Arm64Core(machine_reg);
+    case kX86:
+      return Reg::X86Core(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Core(machine_reg);
+    case kMips:
+      return Reg::MipsCore(machine_reg);
+    case kMips64:
+      return Reg::Mips64Core(machine_reg);
+    default:
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      UNREACHABLE();
+  }
+}
+
+static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return Reg::ArmFp(machine_reg);
+    case kArm64:
+      return Reg::Arm64Fp(machine_reg);
+    case kX86:
+      return Reg::X86Fp(machine_reg);
+    case kX86_64:
+      return Reg::X86_64Fp(machine_reg);
+    default:
+      LOG(FATAL) << "Unknown instruction set: " << isa;
+      UNREACHABLE();
+  }
+}
+
 static void WriteCIE(InstructionSet isa,
                      CFIFormat format,
                      std::vector<uint8_t>* buffer) {
@@ -164,7 +205,7 @@
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format) {
   CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT ||
         format == dwarf::DW_EH_FRAME_FORMAT);
@@ -194,7 +235,7 @@
     cfi_section->WriteFully(buffer.data(), buffer.size());
     buffer_address += buffer.size();
     buffer.clear();
-    for (const OatWriter::DebugInfo& mi : method_infos) {
+    for (const MethodDebugInfo& mi : method_infos) {
       if (!mi.deduped_) {  // Only one FDE per unique address.
         ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo();
         if (!opcodes.empty()) {
@@ -247,16 +288,135 @@
     header_section->WriteFully(binary_search_table.data(), binary_search_table.size());
     header_section->End();
   } else {
-    builder->WritePatches(".debug_frame.oat_patches", &patch_locations);
+    builder->WritePatches(".debug_frame.oat_patches",
+                          ArrayRef<const uintptr_t>(patch_locations));
   }
 }
 
-struct CompilationUnit {
-  std::vector<const OatWriter::DebugInfo*> methods_;
-  size_t debug_line_offset_ = 0;
-  uint32_t low_pc_ = 0xFFFFFFFFU;
-  uint32_t high_pc_ = 0;
-};
+namespace {
+  struct CompilationUnit {
+    std::vector<const MethodDebugInfo*> methods_;
+    size_t debug_line_offset_ = 0;
+    uint32_t low_pc_ = 0xFFFFFFFFU;
+    uint32_t high_pc_ = 0;
+  };
+
+  struct LocalVariable {
+    uint16_t vreg;
+    uint32_t dex_pc_low;
+    uint32_t dex_pc_high;
+    const char* name;
+    const char* type;
+    const char* sig;
+  };
+
+  struct DebugInfoCallback {
+    static void NewLocal(void* ctx,
+                         uint16_t vreg,
+                         uint32_t start,
+                         uint32_t end,
+                         const char* name,
+                         const char* type,
+                         const char* sig) {
+      auto* context = static_cast<DebugInfoCallback*>(ctx);
+      if (name != nullptr && type != nullptr) {
+        context->local_variables_.push_back({vreg, start, end, name, type, sig});
+      }
+    }
+    std::vector<LocalVariable> local_variables_;
+  };
+
+  std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) {
+    std::vector<const char*> names;
+    const uint8_t* stream = mi->dex_file_->GetDebugInfoStream(mi->code_item_);
+    if (stream != nullptr) {
+      DecodeUnsignedLeb128(&stream);  // line.
+      uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
+      for (uint32_t i = 0; i < parameters_size; ++i) {
+        uint32_t id = DecodeUnsignedLeb128P1(&stream);
+        names.push_back(mi->dex_file_->StringDataByIdx(id));
+      }
+    }
+    return names;
+  }
+
+  struct VariableLocation {
+    uint32_t low_pc;
+    uint32_t high_pc;
+    DexRegisterLocation reg_lo;  // May be None if the location is unknown.
+    DexRegisterLocation reg_hi;  // Most significant bits of 64-bit value.
+  };
+
+  // Get the location of given dex register (e.g. stack or machine register).
+  // Note that the location might be different based on the current pc.
+  // The result will cover all ranges where the variable is in scope.
+  std::vector<VariableLocation> GetVariableLocations(const MethodDebugInfo* method_info,
+                                                     uint16_t vreg,
+                                                     bool is64bitValue,
+                                                     uint32_t dex_pc_low,
+                                                     uint32_t dex_pc_high) {
+    std::vector<VariableLocation> variable_locations;
+
+    // Get stack maps sorted by pc (they might not be sorted internally).
+    const CodeInfo code_info(method_info->compiled_method_->GetVmapTable().data());
+    const StackMapEncoding encoding = code_info.ExtractEncoding();
+    std::map<uint32_t, StackMap> stack_maps;
+    for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) {
+      StackMap stack_map = code_info.GetStackMapAt(s, encoding);
+      DCHECK(stack_map.IsValid());
+      const uint32_t low_pc = method_info->low_pc_ + stack_map.GetNativePcOffset(encoding);
+      DCHECK_LE(low_pc, method_info->high_pc_);
+      stack_maps.emplace(low_pc, stack_map);
+    }
+
+    // Create entries for the requested register based on stack map data.
+    for (auto it = stack_maps.begin(); it != stack_maps.end(); it++) {
+      const StackMap& stack_map = it->second;
+      const uint32_t low_pc = it->first;
+      auto next_it = it;
+      next_it++;
+      const uint32_t high_pc = next_it != stack_maps.end() ? next_it->first
+                                                           : method_info->high_pc_;
+      DCHECK_LE(low_pc, high_pc);
+      if (low_pc == high_pc) {
+        continue;  // Ignore if the address range is empty.
+      }
+
+      // Check that the stack map is in the requested range.
+      uint32_t dex_pc = stack_map.GetDexPc(encoding);
+      if (!(dex_pc_low <= dex_pc && dex_pc < dex_pc_high)) {
+        continue;
+      }
+
+      // Find the location of the dex register.
+      DexRegisterLocation reg_lo = DexRegisterLocation::None();
+      DexRegisterLocation reg_hi = DexRegisterLocation::None();
+      if (stack_map.HasDexRegisterMap(encoding)) {
+        DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(
+            stack_map, encoding, method_info->code_item_->registers_size_);
+        reg_lo = dex_register_map.GetDexRegisterLocation(
+            vreg, method_info->code_item_->registers_size_, code_info, encoding);
+        if (is64bitValue) {
+          reg_hi = dex_register_map.GetDexRegisterLocation(
+              vreg + 1, method_info->code_item_->registers_size_, code_info, encoding);
+        }
+      }
+
+      // Add location entry for this address range.
+      if (!variable_locations.empty() &&
+          variable_locations.back().reg_lo == reg_lo &&
+          variable_locations.back().reg_hi == reg_hi &&
+          variable_locations.back().high_pc == low_pc) {
+        // Merge with the previous entry (extend its range).
+        variable_locations.back().high_pc = high_pc;
+      } else {
+        variable_locations.push_back({low_pc, high_pc, reg_lo, reg_hi});
+      }
+    }
+
+    return variable_locations;
+  }
+}  // namespace
 
 // Helper class to write .debug_info and its supporting sections.
 template<typename ElfTypes>
@@ -280,8 +440,8 @@
       info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat"));
       info_.WriteData1(DW_AT_language, DW_LANG_Java);
       info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_);
-      info_.WriteAddr(DW_AT_high_pc, text_address + compilation_unit.high_pc_);
-      info_.WriteData4(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
+      info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_);
+      info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_);
 
       const char* last_dex_class_desc = nullptr;
       for (auto mi : compilation_unit.methods_) {
@@ -290,6 +450,7 @@
         const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method);
         const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto);
         const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method);
+        const bool is_static = (mi->access_flags_ & kAccStatic) != 0;
 
         // Enclose the method in correct class definition.
         if (last_dex_class_desc != dex_class_desc) {
@@ -304,25 +465,39 @@
           last_dex_class_desc = dex_class_desc;
         }
 
+        // Collect information about local variables and parameters.
+        DebugInfoCallback debug_info_callback;
         std::vector<const char*> param_names;
         if (mi->code_item_ != nullptr) {
-          const uint8_t* stream = dex->GetDebugInfoStream(mi->code_item_);
-          if (stream != nullptr) {
-            DecodeUnsignedLeb128(&stream);  // line.
-            uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
-            for (uint32_t i = 0; i < parameters_size; ++i) {
-              uint32_t id = DecodeUnsignedLeb128P1(&stream);
-              param_names.push_back(mi->dex_file_->StringDataByIdx(id));
-            }
-          }
+          dex->DecodeDebugInfo(mi->code_item_,
+                               is_static,
+                               mi->dex_method_index_,
+                               nullptr,
+                               DebugInfoCallback::NewLocal,
+                               &debug_info_callback);
+          param_names = GetParamNames(mi);
         }
 
         int start_depth = info_.Depth();
         info_.StartTag(DW_TAG_subprogram);
         WriteName(dex->GetMethodName(dex_method));
         info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_);
-        info_.WriteAddr(DW_AT_high_pc, text_address + mi->high_pc_);
+        info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_);
+        uint8_t frame_base[] = { DW_OP_call_frame_cfa };
+        info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base));
         WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto));
+        uint32_t vreg = mi->code_item_ == nullptr ? 0 :
+            mi->code_item_->registers_size_ - mi->code_item_->ins_size_;
+        if (!is_static) {
+          info_.StartTag(DW_TAG_formal_parameter);
+          WriteName("this");
+          info_.WriteFlag(DW_AT_artificial, true);
+          WriteLazyType(dex_class_desc);
+          const bool is64bitValue = false;
+          WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+          vreg++;
+          info_.EndTag();
+        }
         if (dex_params != nullptr) {
           for (uint32_t i = 0; i < dex_params->Size(); ++i) {
             info_.StartTag(DW_TAG_formal_parameter);
@@ -330,7 +505,28 @@
             if (i < param_names.size() && param_names[i] != nullptr) {
               WriteName(param_names[i]);
             }
-            WriteLazyType(dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_));
+            // Write the type.
+            const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_);
+            WriteLazyType(type_desc);
+            // Write the stack location of the parameter.
+            const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J';
+            WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_);
+            vreg += is64bitValue ? 2 : 1;
+            info_.EndTag();
+          }
+          if (mi->code_item_ != nullptr) {
+            CHECK_EQ(vreg, mi->code_item_->registers_size_);
+          }
+        }
+        for (const LocalVariable& var : debug_info_callback.local_variables_) {
+          const uint32_t first_arg = mi->code_item_->registers_size_ - mi->code_item_->ins_size_;
+          if (var.vreg < first_arg) {
+            info_.StartTag(DW_TAG_variable);
+            WriteName(var.name);
+            WriteLazyType(var.type);
+            bool is64bitValue = var.type[0] == 'D' || var.type[0] == 'J';
+            WriteRegLocation(mi, var.vreg, is64bitValue, compilation_unit.low_pc_,
+                             var.dex_pc_low, var.dex_pc_high);
             info_.EndTag();
           }
         }
@@ -352,6 +548,158 @@
       owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size());
     }
 
+    // Write table into .debug_loc which describes location of dex register.
+    // The dex register might be valid only at some points and it might
+    // move between machine registers and stack.
+    void WriteRegLocation(const MethodDebugInfo* method_info,
+                          uint16_t vreg,
+                          bool is64bitValue,
+                          uint32_t compilation_unit_low_pc,
+                          uint32_t dex_pc_low = 0,
+                          uint32_t dex_pc_high = 0xFFFFFFFF) {
+      using Kind = DexRegisterLocation::Kind;
+      bool is_optimizing = method_info->compiled_method_->GetQuickCode().size() > 0 &&
+                           method_info->compiled_method_->GetVmapTable().size() > 0 &&
+                           method_info->compiled_method_->GetGcMap().size() == 0 &&
+                           method_info->code_item_ != nullptr;
+      if (!is_optimizing) {
+        return;
+      }
+
+      Writer<> debug_loc(&owner_->debug_loc_);
+      Writer<> debug_ranges(&owner_->debug_ranges_);
+      info_.WriteSecOffset(DW_AT_location, debug_loc.size());
+      info_.WriteSecOffset(DW_AT_start_scope, debug_ranges.size());
+
+      std::vector<VariableLocation> variable_locations = GetVariableLocations(
+          method_info,
+          vreg,
+          is64bitValue,
+          dex_pc_low,
+          dex_pc_high);
+
+      // Write .debug_loc entries.
+      const InstructionSet isa = owner_->builder_->GetIsa();
+      const bool is64bit = Is64BitInstructionSet(isa);
+      for (const VariableLocation& variable_location : variable_locations) {
+        // Translate dex register location to DWARF expression.
+        // Note that 64-bit value might be split to two distinct locations.
+        // (for example, two 32-bit machine registers, or even stack and register)
+        uint8_t buffer[64];
+        uint8_t* pos = buffer;
+        DexRegisterLocation reg_lo = variable_location.reg_lo;
+        DexRegisterLocation reg_hi = variable_location.reg_hi;
+        for (int piece = 0; piece < (is64bitValue ? 2 : 1); piece++) {
+          DexRegisterLocation reg_loc = (piece == 0 ? reg_lo : reg_hi);
+          const Kind kind = reg_loc.GetKind();
+          const int32_t value = reg_loc.GetValue();
+          if (kind == Kind::kInStack) {
+            const size_t frame_size = method_info->compiled_method_->GetFrameSizeInBytes();
+            *(pos++) = DW_OP_fbreg;
+            // The stack offset is relative to SP. Make it relative to CFA.
+            pos = EncodeSignedLeb128(pos, value - frame_size);
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInStack &&
+                reg_hi.GetValue() == value + 4) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kInRegister) {
+            pos = WriteOpReg(pos, GetDwarfCoreReg(isa, value).num());
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInRegisterHigh &&
+                reg_hi.GetValue() == value) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kInFpuRegister) {
+            if ((isa == kArm || isa == kThumb2) &&
+                piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegister &&
+                reg_hi.GetValue() == value + 1 && value % 2 == 0) {
+              // Translate S register pair to D register (e.g. S4+S5 to D2).
+              pos = WriteOpReg(pos, Reg::ArmDp(value / 2).num());
+              break;
+            }
+            if (isa == kMips || isa == kMips64) {
+              // TODO: Find what the DWARF floating point register numbers are on MIPS.
+              break;
+            }
+            pos = WriteOpReg(pos, GetDwarfFpReg(isa, value).num());
+            if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh &&
+                reg_hi.GetValue() == reg_lo.GetValue()) {
+              break;  // the high word is correctly implied by the low word.
+            }
+          } else if (kind == Kind::kConstant) {
+            *(pos++) = DW_OP_consts;
+            pos = EncodeSignedLeb128(pos, value);
+            *(pos++) = DW_OP_stack_value;
+          } else if (kind == Kind::kNone) {
+            break;
+          } else {
+            // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind().
+            // kInRegisterHigh and kInFpuRegisterHigh should be handled by
+            // the special cases above and they should not occur alone.
+            LOG(ERROR) << "Unexpected register location kind: "
+                       << DexRegisterLocation::PrettyDescriptor(kind);
+            break;
+          }
+          if (is64bitValue) {
+            // Write the marker which is needed by split 64-bit values.
+            // This code is skipped by the special cases.
+            *(pos++) = DW_OP_piece;
+            pos = EncodeUnsignedLeb128(pos, 4);
+          }
+        }
+
+        // Check that the buffer is large enough; keep half of it empty for safety.
+        DCHECK_LE(static_cast<size_t>(pos - buffer), sizeof(buffer) / 2);
+        if (pos > buffer) {
+          if (is64bit) {
+            debug_loc.PushUint64(variable_location.low_pc - compilation_unit_low_pc);
+            debug_loc.PushUint64(variable_location.high_pc - compilation_unit_low_pc);
+          } else {
+            debug_loc.PushUint32(variable_location.low_pc - compilation_unit_low_pc);
+            debug_loc.PushUint32(variable_location.high_pc - compilation_unit_low_pc);
+          }
+          // Write the expression.
+          debug_loc.PushUint16(pos - buffer);
+          debug_loc.PushData(buffer, pos - buffer);
+        } else {
+          // Do not generate .debug_loc if the location is not known.
+        }
+      }
+      // Write end-of-list entry.
+      if (is64bit) {
+        debug_loc.PushUint64(0);
+        debug_loc.PushUint64(0);
+      } else {
+        debug_loc.PushUint32(0);
+        debug_loc.PushUint32(0);
+      }
+
+      // Write .debug_ranges entries.
+      // This includes ranges where the variable is in scope but the location is not known.
+      for (size_t i = 0; i < variable_locations.size(); i++) {
+        uint32_t low_pc = variable_locations[i].low_pc;
+        uint32_t high_pc = variable_locations[i].high_pc;
+        while (i + 1 < variable_locations.size() && variable_locations[i+1].low_pc == high_pc) {
+          // Merge address range with the next entry.
+          high_pc = variable_locations[++i].high_pc;
+        }
+        if (is64bit) {
+          debug_ranges.PushUint64(low_pc - compilation_unit_low_pc);
+          debug_ranges.PushUint64(high_pc - compilation_unit_low_pc);
+        } else {
+          debug_ranges.PushUint32(low_pc - compilation_unit_low_pc);
+          debug_ranges.PushUint32(high_pc - compilation_unit_low_pc);
+        }
+      }
+      // Write end-of-list entry.
+      if (is64bit) {
+        debug_ranges.PushUint64(0);
+        debug_ranges.PushUint64(0);
+      } else {
+        debug_ranges.PushUint32(0);
+        debug_ranges.PushUint32(0);
+      }
+    }
+
     // Some types are difficult to define as we go since they need
     // to be enclosed in the right set of namespaces. Therefore we
     // just define all types lazily at the end of compilation unit.
@@ -375,6 +723,17 @@
       info_.WriteStrp(DW_AT_name, owner_->WriteString(name));
     }
 
+    // Helper which writes DWARF expression referencing a register.
+    static uint8_t* WriteOpReg(uint8_t* buffer, uint32_t dwarf_reg_num) {
+      if (dwarf_reg_num < 32) {
+        *(buffer++) = DW_OP_reg0 + dwarf_reg_num;
+      } else {
+        *(buffer++) = DW_OP_regx;
+        buffer = EncodeUnsignedLeb128(buffer, dwarf_reg_num);
+      }
+      return buffer;
+    }
+
     // Convert dex type descriptor to DWARF.
     // Returns offset in the compilation unit.
     size_t WriteType(const char* desc) {
@@ -398,22 +757,60 @@
       } else {
         // Primitive types.
         const char* name;
+        uint32_t encoding;
+        uint32_t byte_size;
         switch (*desc) {
-        case 'B': name = "byte"; break;
-        case 'C': name = "char"; break;
-        case 'D': name = "double"; break;
-        case 'F': name = "float"; break;
-        case 'I': name = "int"; break;
-        case 'J': name = "long"; break;
-        case 'S': name = "short"; break;
-        case 'Z': name = "boolean"; break;
-        case 'V': name = "void"; break;
+        case 'B':
+          name = "byte";
+          encoding = DW_ATE_signed;
+          byte_size = 1;
+          break;
+        case 'C':
+          name = "char";
+          encoding = DW_ATE_UTF;
+          byte_size = 2;
+          break;
+        case 'D':
+          name = "double";
+          encoding = DW_ATE_float;
+          byte_size = 8;
+          break;
+        case 'F':
+          name = "float";
+          encoding = DW_ATE_float;
+          byte_size = 4;
+          break;
+        case 'I':
+          name = "int";
+          encoding = DW_ATE_signed;
+          byte_size = 4;
+          break;
+        case 'J':
+          name = "long";
+          encoding = DW_ATE_signed;
+          byte_size = 8;
+          break;
+        case 'S':
+          name = "short";
+          encoding = DW_ATE_signed;
+          byte_size = 2;
+          break;
+        case 'Z':
+          name = "boolean";
+          encoding = DW_ATE_boolean;
+          byte_size = 1;
+          break;
+        case 'V':
+          LOG(FATAL) << "Void type should not be encoded";
+          UNREACHABLE();
         default:
           LOG(FATAL) << "Unknown dex type descriptor: " << desc;
           UNREACHABLE();
         }
         offset = info_.StartTag(DW_TAG_base_type);
         WriteName(name);
+        info_.WriteData1(DW_AT_encoding, encoding);
+        info_.WriteData1(DW_AT_byte_size, byte_size);
         info_.EndTag();
       }
 
@@ -477,9 +874,12 @@
 
   void End() {
     builder_->GetDebugInfo()->End();
-    builder_->WritePatches(".debug_info.oat_patches", &debug_info_patches_);
+    builder_->WritePatches(".debug_info.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_info_patches_));
     builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data());
     builder_->WriteSection(".debug_str", &debug_str_.Data());
+    builder_->WriteSection(".debug_loc", &debug_loc_);
+    builder_->WriteSection(".debug_ranges", &debug_ranges_);
   }
 
  private:
@@ -491,6 +891,8 @@
   std::vector<uintptr_t> debug_info_patches_;
   DedupVector debug_abbrev_;
   DedupVector debug_str_;
+  std::vector<uint8_t> debug_loc_;
+  std::vector<uint8_t> debug_ranges_;
 
   std::unordered_set<const char*> defined_dex_classes_;  // For CHECKs only.
 };
@@ -542,7 +944,7 @@
     if (dwarf_isa != -1) {
       opcodes.SetISA(dwarf_isa);
     }
-    for (const OatWriter::DebugInfo* mi : compilation_unit.methods_) {
+    for (const MethodDebugInfo* mi : compilation_unit.methods_) {
       // Ignore function if we have already generated line table for the same address.
       // It would confuse the debugger and the DWARF specification forbids it.
       if (mi->deduped_) {
@@ -551,7 +953,7 @@
 
       struct DebugInfoCallbacks {
         static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
-          auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
+          auto* context = static_cast<DebugInfoCallbacks*>(ctx);
           context->dex2line_.push_back({address, static_cast<int32_t>(line)});
           return false;
         }
@@ -659,7 +1061,8 @@
 
   void End() {
     builder_->GetDebugLine()->End();
-    builder_->WritePatches(".debug_line.oat_patches", &debug_line_patches);
+    builder_->WritePatches(".debug_line.oat_patches",
+                           ArrayRef<const uintptr_t>(debug_line_patches));
   }
 
  private:
@@ -669,11 +1072,11 @@
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos) {
+                        const ArrayRef<const MethodDebugInfo>& method_infos) {
   // Group the methods into compilation units based on source file.
   std::vector<CompilationUnit> compilation_units;
   const char* last_source_file = nullptr;
-  for (const OatWriter::DebugInfo& mi : method_infos) {
+  for (const MethodDebugInfo& mi : method_infos) {
     auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_);
     const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def);
     if (compilation_units.empty() || source_file != last_source_file) {
@@ -710,18 +1113,18 @@
 // Explicit instantiations
 template void WriteCFISection<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat format);
 template void WriteCFISection<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos,
+    const ArrayRef<const MethodDebugInfo>& method_infos,
     CFIFormat format);
 template void WriteDebugSections<ElfTypes32>(
     ElfBuilder<ElfTypes32>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 template void WriteDebugSections<ElfTypes64>(
     ElfBuilder<ElfTypes64>* builder,
-    const std::vector<OatWriter::DebugInfo>& method_infos);
+    const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
index e58fd0a..9ed102f 100644
--- a/compiler/elf_writer_debug.h
+++ b/compiler/elf_writer_debug.h
@@ -17,23 +17,22 @@
 #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
 #define ART_COMPILER_ELF_WRITER_DEBUG_H_
 
-#include <vector>
-
 #include "elf_builder.h"
 #include "dwarf/dwarf_constants.h"
 #include "oat_writer.h"
+#include "utils/array_ref.h"
 
 namespace art {
 namespace dwarf {
 
 template<typename ElfTypes>
 void WriteCFISection(ElfBuilder<ElfTypes>* builder,
-                     const std::vector<OatWriter::DebugInfo>& method_infos,
+                     const ArrayRef<const MethodDebugInfo>& method_infos,
                      CFIFormat format);
 
 template<typename ElfTypes>
 void WriteDebugSections(ElfBuilder<ElfTypes>* builder,
-                        const std::vector<OatWriter::DebugInfo>& method_infos);
+                        const ArrayRef<const MethodDebugInfo>& method_infos);
 
 }  // namespace dwarf
 }  // namespace art
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 5c059e1..e411496 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -21,19 +21,18 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/unix_file/fd_file.h"
+#include "base/stl_util.h"
 #include "compiled_method.h"
-#include "dex_file-inl.h"
-#include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf.h"
 #include "elf_builder.h"
-#include "elf_file.h"
 #include "elf_utils.h"
 #include "elf_writer_debug.h"
 #include "globals.h"
 #include "leb128.h"
-#include "oat.h"
-#include "oat_writer.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "utils.h"
 
 namespace art {
@@ -57,125 +56,193 @@
 constexpr bool kGenerateSingleArmMappingSymbol = true;
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Create(File* elf_file,
-                                      OatWriter* oat_writer,
-                                      const std::vector<const DexFile*>& dex_files,
-                                      const std::string& android_root,
-                                      bool is_host,
-                                      const CompilerDriver& driver) {
-  ElfWriterQuick elf_writer(driver, elf_file);
-  return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
+class ElfWriterQuick FINAL : public ElfWriter {
+ public:
+  ElfWriterQuick(InstructionSet instruction_set,
+                 const CompilerOptions* compiler_options,
+                 File* elf_file);
+  ~ElfWriterQuick();
+
+  void Start() OVERRIDE;
+  OutputStream* StartRoData() OVERRIDE;
+  void EndRoData(OutputStream* rodata) OVERRIDE;
+  OutputStream* StartText() OVERRIDE;
+  void EndText(OutputStream* text) OVERRIDE;
+  void SetBssSize(size_t bss_size) OVERRIDE;
+  void WriteDynamicSection() OVERRIDE;
+  void WriteDebugInfo(const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) OVERRIDE;
+  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
+  bool End() OVERRIDE;
+
+  virtual OutputStream* GetStream() OVERRIDE;
+
+  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
+                               std::vector<uint8_t>* buffer);
+
+ private:
+  const CompilerOptions* const compiler_options_;
+  File* const elf_file_;
+  std::unique_ptr<BufferedOutputStream> output_stream_;
+  std::unique_ptr<ElfBuilder<ElfTypes>> builder_;
+
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
+};
+
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file) {
+  if (Is64BitInstructionSet(instruction_set)) {
+    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set, compiler_options, elf_file);
+  } else {
+    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set, compiler_options, elf_file);
+  }
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer);
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos);
 
 template <typename ElfTypes>
-bool ElfWriterQuick<ElfTypes>::Write(
-    OatWriter* oat_writer,
-    const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED,
-    const std::string& android_root_unused ATTRIBUTE_UNUSED,
-    bool is_host_unused ATTRIBUTE_UNUSED) {
-  const InstructionSet isa = compiler_driver_->GetInstructionSet();
-  std::unique_ptr<BufferedOutputStream> output_stream(
-      new BufferedOutputStream(new FileOutputStream(elf_file_)));
-  std::unique_ptr<ElfBuilder<ElfTypes>> builder(
-      new ElfBuilder<ElfTypes>(isa, output_stream.get()));
+ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set,
+                                         const CompilerOptions* compiler_options,
+                                         File* elf_file)
+    : ElfWriter(),
+      compiler_options_(compiler_options),
+      elf_file_(elf_file),
+      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      builder_(new ElfBuilder<ElfTypes>(instruction_set, output_stream_.get())) {}
 
-  builder->Start();
+template <typename ElfTypes>
+ElfWriterQuick<ElfTypes>::~ElfWriterQuick() {}
 
-  auto* rodata = builder->GetRoData();
-  auto* text = builder->GetText();
-  auto* bss = builder->GetBss();
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::Start() {
+  builder_->Start();
+}
 
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartRoData() {
+  auto* rodata = builder_->GetRoData();
   rodata->Start();
-  if (!oat_writer->WriteRodata(rodata)) {
-    return false;
-  }
-  rodata->End();
+  return rodata;
+}
 
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndRoData(OutputStream* rodata) {
+  CHECK_EQ(builder_->GetRoData(), rodata);
+  builder_->GetRoData()->End();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::StartText() {
+  auto* text = builder_->GetText();
   text->Start();
-  if (!oat_writer->WriteCode(text)) {
-    return false;
-  }
-  text->End();
+  return text;
+}
 
-  if (oat_writer->GetBssSize() != 0) {
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::EndText(OutputStream* text) {
+  CHECK_EQ(builder_->GetText(), text);
+  builder_->GetText()->End();
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::SetBssSize(size_t bss_size) {
+  auto* bss = builder_->GetBss();
+  if (bss_size != 0u) {
     bss->Start();
-    bss->SetSize(oat_writer->GetBssSize());
+    bss->SetSize(bss_size);
     bss->End();
   }
-
-  builder->WriteDynamicSection(elf_file_->GetPath());
-
-  if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) {
-    const auto& method_infos = oat_writer->GetMethodDebugInfo();
-    if (!method_infos.empty()) {
-      // Add methods to .symtab.
-      WriteDebugSymbols(builder.get(), oat_writer);
-      // Generate CFI (stack unwinding information).
-      dwarf::WriteCFISection(builder.get(), method_infos, kCFIFormat);
-      // Write DWARF .debug_* sections.
-      dwarf::WriteDebugSections(builder.get(), method_infos);
-    }
-  }
-
-  // Add relocation section for .text.
-  if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
-    // Note that ElfWriter::Fixup will be called regardless and therefore
-    // we need to include oat_patches for debug sections unconditionally.
-    builder->WritePatches(".text.oat_patches", &oat_writer->GetAbsolutePatchLocations());
-  }
-
-  builder->End();
-
-  return builder->Good() && output_stream->Flush();
 }
 
 template <typename ElfTypes>
-static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) {
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
+void ElfWriterQuick<ElfTypes>::WriteDynamicSection() {
+  builder_->WriteDynamicSection(elf_file_->GetPath());
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WriteDebugInfo(
+    const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    if (!method_infos.empty()) {
+      // Add methods to .symtab.
+      WriteDebugSymbols(builder_.get(), method_infos);
+      // Generate CFI (stack unwinding information).
+      dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat);
+      // Write DWARF .debug_* sections.
+      dwarf::WriteDebugSections(builder_.get(), method_infos);
+    }
+  }
+}
+
+template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::WritePatchLocations(
+    const ArrayRef<const uintptr_t>& patch_locations) {
+  // Add relocation section for .text.
+  if (compiler_options_->GetIncludePatchInformation()) {
+    // Note that ElfWriter::Fixup will be called regardless and therefore
+    // we need to include oat_patches for debug sections unconditionally.
+    builder_->WritePatches(".text.oat_patches", patch_locations);
+  }
+}
+
+template <typename ElfTypes>
+bool ElfWriterQuick<ElfTypes>::End() {
+  builder_->End();
+
+  return builder_->Good();
+}
+
+template <typename ElfTypes>
+OutputStream* ElfWriterQuick<ElfTypes>::GetStream() {
+  return builder_->GetStream();
+}
+
+template <typename ElfTypes>
+static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder,
+                              const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) {
   bool generated_mapping_symbol = false;
   auto* strtab = builder->GetStrTab();
   auto* symtab = builder->GetSymTab();
 
-  if (method_info.empty()) {
+  if (method_infos.empty()) {
     return;
   }
 
   // Find all addresses (low_pc) which contain deduped methods.
   // The first instance of method is not marked deduped_, but the rest is.
   std::unordered_set<uint32_t> deduped_addresses;
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
-      deduped_addresses.insert(it->low_pc_);
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
+      deduped_addresses.insert(info.low_pc_);
     }
   }
 
   strtab->Start();
   strtab->Write("");  // strtab should start with empty string.
-  for (auto it = method_info.begin(); it != method_info.end(); ++it) {
-    if (it->deduped_) {
+  for (const dwarf::MethodDebugInfo& info : method_infos) {
+    if (info.deduped_) {
       continue;  // Add symbol only for the first instance.
     }
-    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
-    if (deduped_addresses.find(it->low_pc_) != deduped_addresses.end()) {
+    std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true);
+    if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) {
       name += " [DEDUPED]";
     }
 
-    uint32_t low_pc = it->low_pc_;
+    uint32_t low_pc = info.low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
-    low_pc += it->compiled_method_->CodeDelta();
+    low_pc += info.compiled_method_->CodeDelta();
     symtab->Add(strtab->Write(name), builder->GetText(), low_pc,
-                true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+                true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
     // instructions, so that disassembler tools can correctly disassemble.
     // Note that even if we generate just a single mapping symbol, ARM's Streamline
     // requires it to match function symbol.  Just address 0 does not work.
-    if (it->compiled_method_->GetInstructionSet() == kThumb2) {
+    if (info.compiled_method_->GetInstructionSet() == kThumb2) {
       if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) {
-        symtab->Add(strtab->Write("$t"), builder->GetText(), it->low_pc_ & ~1,
+        symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1,
                     true, 0, STB_LOCAL, STT_NOTYPE);
         generated_mapping_symbol = true;
       }
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index 83781ab..347d372 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -17,46 +17,19 @@
 #ifndef ART_COMPILER_ELF_WRITER_QUICK_H_
 #define ART_COMPILER_ELF_WRITER_QUICK_H_
 
-#include "elf_utils.h"
+#include <memory>
+
+#include "arch/instruction_set.h"
 #include "elf_writer.h"
-#include "oat_writer.h"
+#include "os.h"
 
 namespace art {
 
-template <typename ElfTypes>
-class ElfWriterQuick FINAL : public ElfWriter {
- public:
-  // Write an ELF file. Returns true on success, false on failure.
-  static bool Create(File* file,
-                     OatWriter* oat_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     const std::string& android_root,
-                     bool is_host,
-                     const CompilerDriver& driver)
-      SHARED_REQUIRES(Locks::mutator_lock_);
+class CompilerOptions;
 
-  static void EncodeOatPatches(const std::vector<uintptr_t>& locations,
-                               std::vector<uint8_t>* buffer);
-
- protected:
-  bool Write(OatWriter* oat_writer,
-             const std::vector<const DexFile*>& dex_files,
-             const std::string& android_root,
-             bool is_host)
-      OVERRIDE
-      SHARED_REQUIRES(Locks::mutator_lock_);
-
- private:
-  ElfWriterQuick(const CompilerDriver& driver, File* elf_file)
-    : ElfWriter(driver, elf_file) {}
-  ~ElfWriterQuick() {}
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
-};
-
-// Explicitly instantiated in elf_writer_quick.cc
-typedef ElfWriterQuick<ElfTypes32> ElfWriterQuick32;
-typedef ElfWriterQuick<ElfTypes64> ElfWriterQuick64;
+std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set,
+                                                const CompilerOptions* compiler_options,
+                                                File* elf_file);
 
 }  // namespace art
 
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index b413a9e..7cf774e 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -101,7 +101,8 @@
 
     // Encode patch locations.
     std::vector<uint8_t> oat_patches;
-    ElfBuilder<ElfTypes32>::EncodeOatPatches(patch_locations, &oat_patches);
+    ElfBuilder<ElfTypes32>::EncodeOatPatches(ArrayRef<const uintptr_t>(patch_locations),
+                                             &oat_patches);
 
     // Create buffer to be patched.
     std::vector<uint8_t> initial_data(256);
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 6df1527..cda6240 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,7 +23,9 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
 #include "lock_word.h"
@@ -32,7 +34,6 @@
 #include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -92,12 +93,37 @@
                            /*compiling_boot_image*/true,
                            &timings,
                            &key_value_store);
-      bool success = writer->PrepareImageAddressSpace() &&
-          compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                     !kIsTargetBuild,
-                                     class_linker->GetBootClassPath(),
-                                     &oat_writer,
-                                     oat_file.GetFile());
+      std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+          compiler_driver_->GetInstructionSet(),
+          &compiler_driver_->GetCompilerOptions(),
+          oat_file.GetFile());
+      bool success = writer->PrepareImageAddressSpace();
+      ASSERT_TRUE(success);
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      bool rodata_ok = oat_writer.WriteRodata(rodata);
+      ASSERT_TRUE(rodata_ok);
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      bool text_ok = oat_writer.WriteCode(text);
+      ASSERT_TRUE(text_ok);
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      success = elf_writer->End();
+
       ASSERT_TRUE(success);
     }
   }
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 3d9e7e7..bf1fcdd 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -330,10 +330,20 @@
 }
 
 void ImageWriter::PrepareDexCacheArraySlots() {
+  // Prepare dex cache array starts based on the ordering specified in the CompilerDriver.
+  uint32_t size = 0u;
+  for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) {
+    dex_cache_array_starts_.Put(dex_file, size);
+    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
+    size += layout.Size();
+  }
+  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
+  // when AssignImageBinSlot() assigns their indexes out or order.
+  bin_slot_sizes_[kBinDexCacheArray] = size;
+
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *class_linker->DexLock());
-  uint32_t size = 0u;
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
     mirror::DexCache* dex_cache =
         down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
@@ -341,22 +351,18 @@
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
-    dex_cache_array_starts_.Put(dex_file, size);
     DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
+    uint32_t start = dex_cache_array_starts_.Get(dex_file);
     DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset());
     DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset());
     DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
+    AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset());
     DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
-    AddDexCacheArrayRelocation(dex_cache->GetStrings(), size + layout.StringsOffset());
-    size += layout.Size();
+    AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset());
   }
-  // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
-  // when AssignImageBinSlot() assigns their indexes out or order.
-  bin_slot_sizes_[kBinDexCacheArray] = size;
 }
 
 void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) {
@@ -534,7 +540,10 @@
 }
 
 bool ImageWriter::AllocMemory() {
-  const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_,
+  const size_t length = RoundUp(image_objects_offset_begin_ +
+                                    GetBinSizeSum() +
+                                    intern_table_bytes_ +
+                                    class_table_bytes_,
                                 kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image",
@@ -586,6 +595,17 @@
 }
 
 bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) {
+  bool early_exit = false;
+  std::unordered_set<mirror::Class*> visited;
+  return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited);
+}
+
+bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal(
+    mirror::Class* klass,
+    bool* early_exit,
+    std::unordered_set<mirror::Class*>* visited) {
+  DCHECK(early_exit != nullptr);
+  DCHECK(visited != nullptr);
   if (klass == nullptr) {
     return false;
   }
@@ -594,14 +614,22 @@
     // Already computed, return the found value.
     return found->second;
   }
-  // Place holder value to prevent infinite recursion.
-  prune_class_memo_.emplace(klass, false);
+  // Circular dependencies, return false but do not store the result in the memoization table.
+  if (visited->find(klass) != visited->end()) {
+    *early_exit = true;
+    return false;
+  }
+  visited->emplace(klass);
   bool result = IsBootClassLoaderNonImageClass(klass);
+  bool my_early_exit = false;  // Only for ourselves, ignore caller.
   if (!result) {
     // Check interfaces since these wont be visited through VisitReferences.)
     mirror::IfTable* if_table = klass->GetIfTable();
     for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-      result = result || ContainsBootClassLoaderNonImageClass(if_table->GetInterface(i));
+      result = result || ContainsBootClassLoaderNonImageClassInternal(
+          if_table->GetInterface(i),
+          &my_early_exit,
+          visited);
     }
   }
   // Check static fields and their classes.
@@ -615,16 +643,38 @@
       mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset);
       if (ref != nullptr) {
         if (ref->IsClass()) {
-          result = result || ContainsBootClassLoaderNonImageClass(ref->AsClass());
+          result = result ||
+                   ContainsBootClassLoaderNonImageClassInternal(
+                       ref->AsClass(),
+                       &my_early_exit,
+                       visited);
         }
-        result = result || ContainsBootClassLoaderNonImageClass(ref->GetClass());
+        result = result ||
+                 ContainsBootClassLoaderNonImageClassInternal(
+                     ref->GetClass(),
+                     &my_early_exit,
+                     visited);
       }
       field_offset = MemberOffset(field_offset.Uint32Value() +
                                   sizeof(mirror::HeapReference<mirror::Object>));
     }
   }
-  result = result || ContainsBootClassLoaderNonImageClass(klass->GetSuperClass());
-  prune_class_memo_[klass] = result;
+  result = result ||
+           ContainsBootClassLoaderNonImageClassInternal(
+               klass->GetSuperClass(),
+               &my_early_exit,
+               visited);
+  // Erase the element we stored earlier since we are exiting the function.
+  auto it = visited->find(klass);
+  DCHECK(it != visited->end());
+  visited->erase(it);
+  // Only store result if it is true or none of the calls early exited due to circular
+  // dependencies. If visited is empty then we are the root caller, in this case the cycle was in
+  // a child call and we can remember the result.
+  if (result == true || !my_early_exit || visited->empty()) {
+    prune_class_memo_[klass] = result;
+  }
+  *early_exit |= my_early_exit;
   return result;
 }
 
@@ -983,6 +1033,14 @@
           WalkFieldsInOrder(value);
         }
       }
+    } else if (h_obj->IsClassLoader()) {
+      // Register the class loader if it has a class table.
+      // The fake boot class loader should not get registered and we should end up with only one
+      // class loader.
+      mirror::ClassLoader* class_loader = h_obj->AsClassLoader();
+      if (class_loader->GetClassTable() != nullptr) {
+        class_loaders_.insert(class_loader);
+      }
     }
   }
 }
@@ -1107,10 +1165,26 @@
   }
 
   // Calculate how big the intern table will be after being serialized.
-  auto* const intern_table = Runtime::Current()->GetInternTable();
+  InternTable* const intern_table = runtime->GetInternTable();
   CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
   intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
 
+  // Write out the class table.
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  if (boot_image_space_ == nullptr) {
+    // Compiling the boot image, add null class loader.
+    class_loaders_.insert(nullptr);
+  }
+  if (!class_loaders_.empty()) {
+    CHECK_EQ(class_loaders_.size(), 1u) << "Should only have one real class loader in the image";
+    ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    for (mirror::ClassLoader* loader : class_loaders_) {
+      ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+      CHECK(table != nullptr);
+      class_table_bytes_ += table->WriteToMemory(nullptr);
+    }
+  }
+
   // Note that image_end_ is left at end of used mirror object section.
 }
 
@@ -1152,6 +1226,12 @@
   auto* interned_strings_section = &sections[ImageHeader::kSectionInternedStrings];
   *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
   cur_pos = interned_strings_section->End();
+  // Calculate the size of the class table section.
+  auto* class_table_section = &sections[ImageHeader::kSectionClassTable];
+  *class_table_section = ImageSection(cur_pos, class_table_bytes_);
+  cur_pos = class_table_section->End();
+  // Image end goes right before the start of the image bitmap.
+  const size_t image_end = static_cast<uint32_t>(cur_pos);
   // Finally bitmap section.
   const size_t bitmap_bytes = image_bitmap_->Size();
   auto* bitmap_section = &sections[ImageHeader::kSectionImageBitmap];
@@ -1165,7 +1245,6 @@
     }
     LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_;
   }
-  const size_t image_end = static_cast<uint32_t>(interned_strings_section->End());
   CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) <<
       "Oat file should be right after the image.";
   // Create the header.
@@ -1276,23 +1355,48 @@
     }
     image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method);
   }
+  FixupRootVisitor root_visitor(this);
+
   // Write the intern table into the image.
   const ImageSection& intern_table_section = image_header->GetImageSection(
       ImageHeader::kSectionInternedStrings);
-  InternTable* const intern_table = Runtime::Current()->GetInternTable();
-  uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset();
-  const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr);
+  Runtime* const runtime = Runtime::Current();
+  InternTable* const intern_table = runtime->GetInternTable();
+  uint8_t* const intern_table_memory_ptr = image_->Begin() + intern_table_section.Offset();
+  const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr);
+  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
   // Fixup the pointers in the newly written intern table to contain image addresses.
-  InternTable temp_table;
+  InternTable temp_intern_table;
   // Note that we require that ReadFromMemory does not make an internal copy of the elements so that
   // the VisitRoots() will update the memory directly rather than the copies.
   // This also relies on visit roots not doing any verification which could fail after we update
   // the roots to be the image addresses.
-  temp_table.ReadFromMemory(memory_ptr);
-  CHECK_EQ(temp_table.Size(), intern_table->Size());
-  FixupRootVisitor visitor(this);
-  temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
-  CHECK_EQ(intern_table_bytes, intern_table_bytes_);
+  temp_intern_table.ReadFromMemory(intern_table_memory_ptr);
+  CHECK_EQ(temp_intern_table.Size(), intern_table->Size());
+  temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots);
+
+  // Write the class table(s) into the image.
+  ClassLinker* const class_linker = runtime->GetClassLinker();
+  const ImageSection& class_table_section = image_header->GetImageSection(
+      ImageHeader::kSectionClassTable);
+  uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset();
+  ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  size_t class_table_bytes = 0;
+  for (mirror::ClassLoader* loader : class_loaders_) {
+    ClassTable* table = class_linker->ClassTableForClassLoader(loader);
+    CHECK(table != nullptr);
+    uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes;
+    class_table_bytes += table->WriteToMemory(memory_ptr);
+    // Fixup the pointers in the newly written class table to contain image addresses. See
+    // above comment for intern tables.
+    ClassTable temp_class_table;
+    temp_class_table.ReadFromMemory(memory_ptr);
+    // CHECK_EQ(temp_class_table.NumNonZygoteClasses(), table->NumNonZygoteClasses());
+    BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor,
+                                                                    RootInfo(kRootUnknown));
+    temp_class_table.VisitRoots(buffered_visitor);
+  }
+  CHECK_EQ(class_table_bytes, class_table_bytes_);
 }
 
 void ImageWriter::CopyAndFixupObjects() {
@@ -1506,8 +1610,7 @@
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) {
         FixupDexCache(down_cast<mirror::DexCache*>(orig), down_cast<mirror::DexCache*>(copy));
-      } else if (klass->IsSubClass(down_cast<mirror::Class*>(
-          class_linker->GetClassRoot(ClassLinker::kJavaLangClassLoader)))) {
+      } else if (klass->IsClassLoaderClass()) {
         // If src is a ClassLoader, set the class table to null so that it gets recreated by the
         // ClassLoader.
         down_cast<mirror::ClassLoader*>(copy)->SetClassTable(nullptr);
@@ -1793,7 +1896,8 @@
                                 bin_slot_sizes_[kBinArtMethodDirty] +
                                 bin_slot_sizes_[kBinArtMethodClean] +
                                 bin_slot_sizes_[kBinDexCacheArray] +
-                                intern_table_bytes_;
+                                intern_table_bytes_ +
+                                class_table_bytes_;
   return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize);
 }
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 22cb91a..386838f 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -72,7 +72,8 @@
         intern_table_bytes_(0u),
         image_method_array_(ImageHeader::kImageMethodsCount),
         dirty_methods_(0u),
-        clean_methods_(0u) {
+        clean_methods_(0u),
+        class_table_bytes_(0u) {
     CHECK_NE(image_begin, 0U);
     std::fill_n(image_methods_, arraysize(image_methods_), nullptr);
     std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0);
@@ -343,6 +344,12 @@
   bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // early_exit is true if we had a cyclic dependency anywhere down the chain.
+  bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass,
+                                                    bool* early_exit,
+                                                    std::unordered_set<mirror::Class*>* visited)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type);
 
   uintptr_t NativeOffsetInImage(void* obj);
@@ -447,6 +454,12 @@
   // Prune class memoization table to speed up ContainsBootClassLoaderNonImageClass.
   std::unordered_map<mirror::Class*, bool> prune_class_memo_;
 
+  // Class loaders with a class table to write out. Should only be one currently.
+  std::unordered_set<mirror::ClassLoader*> class_loaders_;
+
+  // Number of image class table bytes.
+  size_t class_table_bytes_;
+
   friend class ContainsBootClassLoaderNonImageClassVisitor;
   friend class FixupClassVisitor;
   friend class FixupRootVisitor;
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 2125c9a..d001495 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -170,18 +170,6 @@
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Check if the method is already compiled.
-  if (runtime->GetJit()->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
-    VLOG(jit) << "Already compiled " << PrettyMethod(method);
-    return true;
-  }
-
-  // Don't compile the method if we are supposed to be deoptimized.
-  instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation();
-  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
-    return false;
-  }
-
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
@@ -190,13 +178,13 @@
   }
 
   // Do the compilation.
-  JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
   bool success = false;
   {
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     // If we get a request to compile a proxy method, we pass the actual Java method
     // of that proxy method, as the compiler does not expect a proxy method.
     ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
+    JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
   }
 
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 13754fd..73b0fac 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -17,9 +17,9 @@
 #include "linker/arm/relative_patcher_arm_base.h"
 
 #include "compiled_method.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 57018af..3d4c218 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -20,10 +20,10 @@
 #include "art_method.h"
 #include "compiled_method.h"
 #include "driver/compiler_driver.h"
-#include "utils/arm64/assembler_arm64.h"
+#include "linker/output_stream.h"
 #include "oat.h"
 #include "oat_quick_method_header.h"
-#include "output_stream.h"
+#include "utils/arm64/assembler_arm64.h"
 
 namespace art {
 namespace linker {
diff --git a/compiler/buffered_output_stream.cc b/compiler/linker/buffered_output_stream.cc
similarity index 76%
rename from compiler/buffered_output_stream.cc
rename to compiler/linker/buffered_output_stream.cc
index 3ca518b..4c66c76 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/linker/buffered_output_stream.cc
@@ -20,18 +20,24 @@
 
 namespace art {
 
-BufferedOutputStream::BufferedOutputStream(OutputStream* out)
-    : OutputStream(out->GetLocation()), out_(out), used_(0) {}
+BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out)
+    : OutputStream(out->GetLocation()),  // Before out is moved to out_.
+      out_(std::move(out)),
+      used_(0) {}
+
+BufferedOutputStream::~BufferedOutputStream() {
+  FlushBuffer();
+}
 
 bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
     return out_->WriteFully(buffer, byte_count);
   }
   if (used_ + byte_count > kBufferSize) {
-    if (!Flush()) {
+    if (!FlushBuffer()) {
       return false;
     }
   }
@@ -42,6 +48,10 @@
 }
 
 bool BufferedOutputStream::Flush() {
+  return FlushBuffer() && out_->Flush();
+}
+
+bool BufferedOutputStream::FlushBuffer() {
   bool success = true;
   if (used_ > 0) {
     success = out_->WriteFully(&buffer_[0], used_);
@@ -51,7 +61,7 @@
 }
 
 off_t BufferedOutputStream::Seek(off_t offset, Whence whence) {
-  if (!Flush()) {
+  if (!FlushBuffer()) {
     return -1;
   }
   return out_->Seek(offset, whence);
diff --git a/compiler/buffered_output_stream.h b/compiler/linker/buffered_output_stream.h
similarity index 65%
rename from compiler/buffered_output_stream.h
rename to compiler/linker/buffered_output_stream.h
index b447f41..a2eefbb 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/linker/buffered_output_stream.h
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
-#define ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
+
+#include <memory>
 
 #include "output_stream.h"
 
@@ -25,26 +27,23 @@
 
 class BufferedOutputStream FINAL : public OutputStream {
  public:
-  explicit BufferedOutputStream(OutputStream* out);
+  explicit BufferedOutputStream(std::unique_ptr<OutputStream> out);
 
-  virtual ~BufferedOutputStream() {
-    Flush();
-    delete out_;
-  }
+  ~BufferedOutputStream() OVERRIDE;
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
 
-  bool Flush();
+  bool Flush() OVERRIDE;
 
  private:
   static const size_t kBufferSize = 8 * KB;
 
-  OutputStream* const out_;
+  bool FlushBuffer();
 
+  std::unique_ptr<OutputStream> const out_;
   uint8_t buffer_[kBufferSize];
-
   size_t used_;
 
   DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream);
@@ -52,4 +51,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_BUFFERED_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_
diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h
new file mode 100644
index 0000000..99410e4
--- /dev/null
+++ b/compiler/linker/error_delaying_output_stream.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
+
+#include "output_stream.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+// OutputStream wrapper that delays reporting an error until Flush().
+class ErrorDelayingOutputStream FINAL : public OutputStream {
+ public:
+  explicit ErrorDelayingOutputStream(OutputStream* output)
+      : OutputStream(output->GetLocation()),
+        output_(output),
+        output_good_(true),
+        output_offset_(0) { }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
+    if (output_good_) {
+      if (!output_->WriteFully(buffer, byte_count)) {
+        PLOG(ERROR) << "Failed to write " << byte_count
+                    << " bytes to " << GetLocation() << " at offset " << output_offset_;
+        output_good_ = false;
+      }
+    }
+    output_offset_ += byte_count;
+    return true;
+  }
+
+  // This function always succeeds to simplify code.
+  // Use Good() to check the actual status of the output stream.
+  off_t Seek(off_t offset, Whence whence) OVERRIDE {
+    // We keep shadow copy of the offset so that we return
+    // the expected value even if the output stream failed.
+    off_t new_offset;
+    switch (whence) {
+      case kSeekSet:
+        new_offset = offset;
+        break;
+      case kSeekCurrent:
+        new_offset = output_offset_ + offset;
+        break;
+      default:
+        LOG(FATAL) << "Unsupported seek type: " << whence;
+        UNREACHABLE();
+    }
+    if (output_good_) {
+      off_t actual_offset = output_->Seek(offset, whence);
+      if (actual_offset == static_cast<off_t>(-1)) {
+        PLOG(ERROR) << "Failed to seek in " << GetLocation() << ". Offset=" << offset
+                    << " whence=" << whence << " new_offset=" << new_offset;
+        output_good_ = false;
+      }
+      DCHECK_EQ(actual_offset, new_offset);
+    }
+    output_offset_ = new_offset;
+    return new_offset;
+  }
+
+  // Flush the output and return whether all operations have succeeded.
+  // Do nothing if we already have a pending error.
+  bool Flush() OVERRIDE {
+    if (output_good_) {
+      output_good_ = output_->Flush();
+    }
+    return output_good_;
+  }
+
+  // Check (without flushing) whether all operations have succeeded so far.
+  bool Good() const {
+    return output_good_;
+  }
+
+ private:
+  OutputStream* output_;
+  bool output_good_;  // True if all writes to output succeeded.
+  off_t output_offset_;  // Keep track of the current position in the stream.
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_
diff --git a/compiler/file_output_stream.cc b/compiler/linker/file_output_stream.cc
similarity index 94%
rename from compiler/file_output_stream.cc
rename to compiler/linker/file_output_stream.cc
index 3ee16f5..bbfbdfd 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/linker/file_output_stream.cc
@@ -33,4 +33,8 @@
   return lseek(file_->Fd(), offset, static_cast<int>(whence));
 }
 
+bool FileOutputStream::Flush() {
+  return file_->Flush() == 0;
+}
+
 }  // namespace art
diff --git a/compiler/file_output_stream.h b/compiler/linker/file_output_stream.h
similarity index 72%
rename from compiler/file_output_stream.h
rename to compiler/linker/file_output_stream.h
index 9dfbd7f..f2d8453 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/linker/file_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_FILE_OUTPUT_STREAM_H_
-#define ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -27,11 +27,13 @@
  public:
   explicit FileOutputStream(File* file);
 
-  virtual ~FileOutputStream() {}
+  ~FileOutputStream() OVERRIDE {}
 
-  virtual bool WriteFully(const void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE;
 
-  virtual off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE;
 
  private:
   File* const file_;
@@ -41,4 +43,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_FILE_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream.cc b/compiler/linker/output_stream.cc
similarity index 100%
rename from compiler/output_stream.cc
rename to compiler/linker/output_stream.cc
diff --git a/compiler/output_stream.h b/compiler/linker/output_stream.h
similarity index 76%
rename from compiler/output_stream.h
rename to compiler/linker/output_stream.h
index 4d30b83..96a5f48 100644
--- a/compiler/output_stream.h
+++ b/compiler/linker/output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OUTPUT_STREAM_H_
-#define ART_COMPILER_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_OUTPUT_STREAM_H_
 
 #include <ostream>
 #include <string>
@@ -45,6 +45,14 @@
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
+  /*
+   * Flushes the stream. Returns whether the operation was successful.
+   *
+   * An OutputStream may delay reporting errors from WriteFully() or
+   * Seek(). In that case, Flush() shall report any pending error.
+   */
+  virtual bool Flush() = 0;
+
  private:
   const std::string location_;
 
@@ -53,4 +61,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_OUTPUT_STREAM_H_
diff --git a/compiler/output_stream_test.cc b/compiler/linker/output_stream_test.cc
similarity index 71%
rename from compiler/output_stream_test.cc
rename to compiler/linker/output_stream_test.cc
index 6104ccd..84c76f2 100644
--- a/compiler/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -19,6 +19,7 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -48,6 +49,7 @@
     EXPECT_TRUE(output_stream_->WriteFully(buf, 4));
     CheckOffset(10);
     EXPECT_TRUE(output_stream_->WriteFully(buf, 6));
+    EXPECT_TRUE(output_stream_->Flush());
   }
 
   void CheckTestOutput(const std::vector<uint8_t>& actual) {
@@ -77,9 +79,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    std::unique_ptr<FileOutputStream> file_output_stream(new FileOutputStream(tmp.GetFile()));
-    CHECK(file_output_stream.get() != nullptr);
-    BufferedOutputStream buffered_output_stream(file_output_stream.release());
+    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -99,4 +99,39 @@
   CheckTestOutput(output);
 }
 
+TEST_F(OutputStreamTest, BufferedFlush) {
+  struct CheckingOutputStream : OutputStream {
+    CheckingOutputStream()
+        : OutputStream("dummy"),
+          flush_called(false) { }
+    ~CheckingOutputStream() OVERRIDE {}
+
+    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
+                    size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE {
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    }
+
+    bool Flush() OVERRIDE {
+      flush_called = true;
+      return true;
+    }
+
+    bool flush_called;
+  };
+
+  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  CheckingOutputStream* checking_output_stream = cos.get();
+  BufferedOutputStream buffered(std::move(cos));
+  ASSERT_FALSE(checking_output_stream->flush_called);
+  bool flush_result = buffered.Flush();
+  ASSERT_TRUE(flush_result);
+  ASSERT_TRUE(checking_output_stream->flush_called);
+}
+
 }  // namespace art
diff --git a/compiler/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc
similarity index 94%
rename from compiler/vector_output_stream.cc
rename to compiler/linker/vector_output_stream.cc
index 3d33673..f758005 100644
--- a/compiler/vector_output_stream.cc
+++ b/compiler/linker/vector_output_stream.cc
@@ -21,7 +21,7 @@
 namespace art {
 
 VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector)
-  : OutputStream(location), offset_(vector->size()), vector_(vector) {}
+    : OutputStream(location), offset_(vector->size()), vector_(vector) {}
 
 off_t VectorOutputStream::Seek(off_t offset, Whence whence) {
   CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence;
diff --git a/compiler/vector_output_stream.h b/compiler/linker/vector_output_stream.h
similarity index 81%
rename from compiler/vector_output_stream.h
rename to compiler/linker/vector_output_stream.h
index 3c5877c..3210143 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/linker/vector_output_stream.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
-#define ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#ifndef ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
+#define ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
 
 #include "output_stream.h"
 
@@ -29,9 +29,9 @@
  public:
   VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector);
 
-  virtual ~VectorOutputStream() {}
+  ~VectorOutputStream() OVERRIDE {}
 
-  bool WriteFully(const void* buffer, size_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE {
     if (static_cast<size_t>(offset_) == vector_->size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_->insert(vector_->end(), &start[0], &start[byte_count]);
@@ -45,7 +45,11 @@
     return true;
   }
 
-  off_t Seek(off_t offset, Whence whence);
+  off_t Seek(off_t offset, Whence whence) OVERRIDE;
+
+  bool Flush() OVERRIDE {
+    return true;
+  }
 
  private:
   void EnsureCapacity(off_t new_offset) {
@@ -62,4 +66,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_VECTOR_OUTPUT_STREAM_H_
+#endif  // ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 030451c..b8610d0 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -27,14 +27,17 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
+#include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "linker/vector_output_stream.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "oat_file-inl.h"
 #include "oat_writer.h"
 #include "scoped_thread_state_change.h"
-#include "vector_output_stream.h"
 
 namespace art {
 
@@ -134,11 +137,36 @@
                          /*compiling_boot_image*/false,
                          &timings,
                          &key_value_store);
-    return compiler_driver_->WriteElf(GetTestAndroidRoot(),
-                                      !kIsTargetBuild,
-                                      dex_files,
-                                      &oat_writer,
-                                      file);
+    std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick(
+        compiler_driver_->GetInstructionSet(),
+        &compiler_driver_->GetCompilerOptions(),
+        file);
+
+    elf_writer->Start();
+
+    OutputStream* rodata = elf_writer->StartRoData();
+    if (!oat_writer.WriteRodata(rodata)) {
+      return false;
+    }
+    elf_writer->EndRoData(rodata);
+
+    OutputStream* text = elf_writer->StartText();
+    if (!oat_writer.WriteCode(text)) {
+      return false;
+    }
+    elf_writer->EndText(text);
+
+    elf_writer->SetBssSize(oat_writer.GetBssSize());
+
+    elf_writer->WriteDynamicSection();
+
+    ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer.GetMethodDebugInfo());
+    elf_writer->WriteDebugInfo(method_infos);
+
+    ArrayRef<const uintptr_t> patch_locations(oat_writer.GetAbsolutePatchLocations());
+    elf_writer->WritePatchLocations(patch_locations);
+
+    return elf_writer->End();
   }
 
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 40a3f14..e8e775f 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -31,10 +31,12 @@
 #include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
 #include "image_writer.h"
+#include "linker/output_stream.h"
 #include "linker/relative_patcher.h"
 #include "mirror/array.h"
 #include "mirror/class_loader.h"
@@ -42,7 +44,6 @@
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "type_lookup_table.h"
@@ -485,7 +486,7 @@
         // Record debug information for this function if we are doing that.
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        writer_->method_info_.push_back(DebugInfo {
+        writer_->method_info_.push_back(dwarf::MethodDebugInfo {
             dex_file_,
             class_def_index_,
             it.GetMemberIndex(),
@@ -640,8 +641,12 @@
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
         Thread::Current(), *dex_file_)));
-    ArtMethod* method = linker->ResolveMethod(
-        *dex_file_, it.GetMemberIndex(), dex_cache, NullHandle<mirror::ClassLoader>(), nullptr,
+    ArtMethod* method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+        *dex_file_,
+        it.GetMemberIndex(),
+        dex_cache,
+        NullHandle<mirror::ClassLoader>(),
+        nullptr,
         invoke_type);
     if (method == nullptr) {
       LOG(INTERNAL_FATAL) << "Unexpected failure to resolve a method: "
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 7027434..6c46ebc 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -38,6 +38,10 @@
 class TimingLogger;
 class TypeLookupTable;
 
+namespace dwarf {
+struct MethodDebugInfo;
+}  // namespace dwarf
+
 // OatHeader         variable length with count of D OatDexFiles
 //
 // OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
@@ -129,19 +133,7 @@
 
   ~OatWriter();
 
-  struct DebugInfo {
-    const DexFile* dex_file_;
-    size_t class_def_index_;
-    uint32_t dex_method_index_;
-    uint32_t access_flags_;
-    const DexFile::CodeItem *code_item_;
-    bool deduped_;
-    uint32_t low_pc_;
-    uint32_t high_pc_;
-    CompiledMethod* compiled_method_;
-  };
-
-  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
+  const std::vector<dwarf::MethodDebugInfo>& GetMethodDebugInfo() const {
     return method_info_;
   }
 
@@ -280,7 +272,7 @@
     DISALLOW_COPY_AND_ASSIGN(OatClass);
   };
 
-  std::vector<DebugInfo> method_info_;
+  std::vector<dwarf::MethodDebugInfo> method_info_;
 
   const CompilerDriver* const compiler_driver_;
   ImageWriter* const image_writer_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index cca0baf..4c3f66a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
 
 #include "base/arena_containers.h"
 #include "induction_var_range.h"
+#include "side_effects_analysis.h"
 #include "nodes.h"
 
 namespace art {
@@ -175,6 +176,24 @@
     return false;
   }
 
+  // Returns if it's certain this->bound > `bound`.
+  bool GreaterThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ > bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
+  // Returns if it's certain this->bound < `bound`.
+  bool LessThan(ValueBound bound) const {
+    if (Equal(instruction_, bound.instruction_)) {
+      return constant_ < bound.constant_;
+    }
+    // Not comparable. Just return false.
+    return false;
+  }
+
   // Try to narrow lower bound. Returns the greatest of the two if possible.
   // Pick one if they are not comparable.
   static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -252,157 +271,6 @@
   int32_t constant_;
 };
 
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
-  explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
-      : induction_variable_(induction_variable),
-        found_array_length_(nullptr),
-        offset_low_(std::numeric_limits<int32_t>::max()),
-        offset_high_(std::numeric_limits<int32_t>::min()) {
-    Run();
-  }
-
-  HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
-  bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
-  int32_t GetOffsetLow() const { return offset_low_; }
-  int32_t GetOffsetHigh() const { return offset_high_; }
-
-  // Returns if `block` that is in loop_info may exit the loop, unless it's
-  // the loop header for loop_info.
-  static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
-    DCHECK(loop_info->Contains(*block));
-    if (block == loop_info->GetHeader()) {
-      // Loop header of loop_info. Exiting loop is normal.
-      return false;
-    }
-    for (HBasicBlock* successor : block->GetSuccessors()) {
-      if (!loop_info->Contains(*successor)) {
-        // One of the successors exits the loop.
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
-    for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
-      if (!block->Dominates(back_edge)) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  void Run() {
-    HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
-    HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
-    HBasicBlock* block = it_loop.Current();
-    DCHECK(block == induction_variable_->GetBlock());
-    // Skip loop header. Since narrowed value range of a MonotonicValueRange only
-    // applies to the loop body (after the test at the end of the loop header).
-    it_loop.Advance();
-    for (; !it_loop.Done(); it_loop.Advance()) {
-      block = it_loop.Current();
-      DCHECK(block->IsInLoop());
-      if (!DominatesAllBackEdges(block, loop_info)) {
-        // In order not to trigger deoptimization unnecessarily, make sure
-        // that all array accesses collected are really executed in the loop.
-        // For array accesses in a branch inside the loop, don't collect the
-        // access. The bounds check in that branch might not be eliminated.
-        continue;
-      }
-      if (EarlyExit(block, loop_info)) {
-        // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
-        // that the loop will loop through the full monotonic value range from
-        // initial_ to end_. So adding deoptimization might be too aggressive and can
-        // trigger deoptimization unnecessarily even if the loop won't actually throw
-        // AIOOBE.
-        found_array_length_ = nullptr;
-        return;
-      }
-      for (HInstruction* instruction = block->GetFirstInstruction();
-           instruction != nullptr;
-           instruction = instruction->GetNext()) {
-        if (!instruction->IsBoundsCheck()) {
-          continue;
-        }
-
-        HInstruction* length_value = instruction->InputAt(1);
-        if (length_value->IsIntConstant()) {
-          // TODO: may optimize for constant case.
-          continue;
-        }
-
-        if (length_value->IsPhi()) {
-          // When adding deoptimizations in outer loops, we might create
-          // a phi for the array length, and update all uses of the
-          // length in the loop to that phi. Therefore, inner loops having
-          // bounds checks on the same array will use that phi.
-          // TODO: handle these cases.
-          continue;
-        }
-
-        DCHECK(length_value->IsArrayLength());
-        HArrayLength* array_length = length_value->AsArrayLength();
-
-        HInstruction* array = array_length->InputAt(0);
-        if (array->IsNullCheck()) {
-          array = array->AsNullCheck()->InputAt(0);
-        }
-        if (loop_info->Contains(*array->GetBlock())) {
-          // Array is defined inside the loop. Skip.
-          continue;
-        }
-
-        if (found_array_length_ != nullptr && found_array_length_ != array_length) {
-          // There is already access for another array recorded for the loop.
-          // TODO: handle multiple arrays.
-          continue;
-        }
-
-        HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
-        HInstruction* left = index;
-        int32_t right = 0;
-        if (left == induction_variable_ ||
-            (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
-             left == induction_variable_)) {
-          // For patterns like array[i] or array[i + 2].
-          if (right < offset_low_) {
-            offset_low_ = right;
-          }
-          if (right > offset_high_) {
-            offset_high_ = right;
-          }
-        } else {
-          // Access not in induction_variable/(induction_variable_ + constant)
-          // format. Skip.
-          continue;
-        }
-        // Record this array.
-        found_array_length_ = array_length;
-      }
-    }
-  }
-
- private:
-  // The instruction that corresponds to a MonotonicValueRange.
-  HInstruction* induction_variable_;
-
-  // The array length of the array that's accessed inside the loop body.
-  HArrayLength* found_array_length_;
-
-  // The lowest and highest constant offsets relative to induction variable
-  // instruction_ in all array accesses.
-  // If array access are: array[i-1], array[i], array[i+1],
-  // offset_low_ is -1 and offset_high is 1.
-  int32_t offset_low_;
-  int32_t offset_high_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
 /**
  * Represent a range of lower bound and upper bound, both being inclusive.
  * Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +368,13 @@
       : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
         induction_variable_(induction_variable),
         initial_(initial),
-        end_(nullptr),
-        inclusive_(false),
         increment_(increment),
         bound_(bound) {}
 
   virtual ~MonotonicValueRange() {}
 
-  HInstruction* GetInductionVariable() const { return induction_variable_; }
   int32_t GetIncrement() const { return increment_; }
   ValueBound GetBound() const { return bound_; }
-  void SetEnd(HInstruction* end) { end_ = end; }
-  void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
   HBasicBlock* GetLoopHeader() const {
     DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
     return induction_variable_->GetBlock();
@@ -519,23 +382,6 @@
 
   MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
 
-  HBasicBlock* GetLoopHeaderSuccesorInLoop() {
-    HBasicBlock* header = GetLoopHeader();
-    HInstruction* instruction = header->GetLastInstruction();
-    DCHECK(instruction->IsIf());
-    HIf* h_if = instruction->AsIf();
-    HLoopInformation* loop_info = header->GetLoopInformation();
-    bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
-    bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
-    // Just in case it's some strange loop structure.
-    if (true_successor_in_loop && false_successor_in_loop) {
-      return nullptr;
-    }
-    DCHECK(true_successor_in_loop || false_successor_in_loop);
-    return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
-  }
-
   // If it's certain that this value range fits in other_range.
   bool FitsIn(ValueRange* other_range) const OVERRIDE {
     if (other_range == nullptr) {
@@ -627,467 +473,9 @@
     }
   }
 
-  // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
-  // For example, this loop:
-  //
-  //   for (int i = start; i < end; i++) {
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // will be transformed to:
-  //
-  //   int array_length_in_loop_body_if_needed;
-  //   if (start >= end) {
-  //     array_length_in_loop_body_if_needed = 0;
-  //   } else {
-  //     if (start < 1) deoptimize();
-  //     if (array == null) deoptimize();
-  //     array_length = array.length;
-  //     if (end > array_length - 1) deoptimize;
-  //     array_length_in_loop_body_if_needed = array_length;
-  //   }
-  //   for (int i = start; i < end; i++) {
-  //     // No more null check and bounds check.
-  //     // array.length value is replaced with array_length_in_loop_body_if_needed
-  //     // in the loop body.
-  //     array[i - 1] = array[i] + array[i + 1];
-  //   }
-  //
-  // We basically first go through the loop body and find those array accesses whose
-  // index is at a constant offset from the induction variable ('i' in the above example),
-  // and update offset_low and offset_high along the way. We then add the following
-  // deoptimizations in the loop pre-header (suppose end is not inclusive).
-  //   if (start < -offset_low) deoptimize();
-  //   if (end >= array.length - offset_high) deoptimize();
-  // It might be necessary to first hoist array.length (and the null check on it) out of
-  // the loop with another deoptimization.
-  //
-  // In order not to trigger deoptimization unnecessarily, we want to make a strong
-  // guarantee that no deoptimization is triggered if the loop body itself doesn't
-  // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
-  // body must throw AIOOBE).
-  // This is achieved by the following:
-  // 1) We only process loops that iterate through the full monotonic range from
-  //    initial_ to end_. We do the following checks to make sure that's the case:
-  //    a) The loop doesn't have early exit (via break, return, etc.)
-  //    b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
-  // 2) We only collect array accesses of blocks in the loop body that dominate
-  //    all loop back edges, these array accesses are guaranteed to happen
-  //    at each loop iteration.
-  // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
-  // when the induction variable is at initial_ and end_ must be in a legal range.
-  // Since the added deoptimizations are basically checking the induction variable
-  // at initial_ and end_ values, no deoptimization will be triggered either.
-  //
-  // A special case is the loop body isn't entered at all. In that case, we may still
-  // add deoptimization due to the analysis described above. In order not to trigger
-  // deoptimization, we do a test between initial_ and end_ first and skip over
-  // the added deoptimization.
-  ValueRange* NarrowWithDeoptimization() {
-    if (increment_ != 1 && increment_ != -1) {
-      // In order not to trigger deoptimization unnecessarily, we want to
-      // make sure the loop iterates through the full range from initial_ to
-      // end_ so that boundaries are covered by the loop. An increment of 2,
-      // for example, may skip end_.
-      return this;
-    }
-
-    if (end_ == nullptr) {
-      // No full info to add deoptimization.
-      return this;
-    }
-
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't add a check in loop pre-header if the value isn't available there.
-      return this;
-    }
-
-    ArrayAccessInsideLoopFinder finder(induction_variable_);
-
-    if (!finder.HasFoundArrayLength()) {
-      // No array access was found inside the loop that can benefit
-      // from deoptimization.
-      return this;
-    }
-
-    if (!AddDeoptimization(finder)) {
-      return this;
-    }
-
-    // After added deoptimizations, induction variable fits in
-    // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
-    ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
-    ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
-    // We've narrowed the range after added deoptimizations.
-    return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
-  }
-
-  // Returns true if adding a (constant >= value) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    // See if we can prove the relationship first.
-    if (value->IsIntConstant()) {
-      if (value->AsIntConstant()->GetValue() >= constant) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Try to filter out cases that the loop entry test will never be true.
-  bool LoopEntryTestUseful() {
-    if (initial_->IsIntConstant() && end_->IsIntConstant()) {
-      int32_t initial_val = initial_->AsIntConstant()->GetValue();
-      int32_t end_val = end_->AsIntConstant()->GetValue();
-      if (increment_ == 1) {
-        if (inclusive_) {
-          return initial_val > end_val;
-        } else {
-          return initial_val >= end_val;
-        }
-      } else {
-        DCHECK_EQ(increment_, -1);
-        if (inclusive_) {
-          return initial_val < end_val;
-        } else {
-          return initial_val <= end_val;
-        }
-      }
-    }
-    return true;
-  }
-
-  // Returns the block for adding deoptimization.
-  HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    // Deoptimization is only added when both initial_ and end_ are defined
-    // before the loop.
-    DCHECK(initial_->GetBlock()->Dominates(pre_header));
-    DCHECK(end_->GetBlock()->Dominates(pre_header));
-
-    // If it can be proven the loop body is definitely entered (unless exception
-    // is thrown in the loop header for which triggering deoptimization is fine),
-    // there is no need for tranforming the loop. In that case, deoptimization
-    // will just be added in the loop pre-header.
-    if (!LoopEntryTestUseful()) {
-      return pre_header;
-    }
-
-    HGraph* graph = header->GetGraph();
-    graph->TransformLoopHeaderForBCE(header);
-    HBasicBlock* new_pre_header = header->GetDominator();
-    DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
-    HBasicBlock* if_block = new_pre_header->GetDominator();
-    HBasicBlock* dummy_block = if_block->GetSuccessors()[0];  // True successor.
-    HBasicBlock* deopt_block = if_block->GetSuccessors()[1];  // False successor.
-
-    dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
-    deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
-    new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
-    return deopt_block;
-  }
-
-  // Adds a test between initial_ and end_ to see if the loop body is entered.
-  // If the loop body isn't entered at all, it jumps to the loop pre-header (after
-  // transformation) to avoid any deoptimization.
-  void AddLoopBodyEntryTest() {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    HBasicBlock* if_block = pre_header->GetDominator();
-    HGraph* graph = header->GetGraph();
-
-    HCondition* cond;
-    if (increment_ == 1) {
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
-      }
-    } else {
-      DCHECK_EQ(increment_, -1);
-      if (inclusive_) {
-        cond = new (graph->GetArena()) HLessThan(initial_, end_);
-      } else {
-        cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
-      }
-    }
-    HIf* h_if = new (graph->GetArena()) HIf(cond);
-    if_block->AddInstruction(cond);
-    if_block->AddInstruction(h_if);
-  }
-
-  // Adds a check that (value >= constant), and HDeoptimize otherwise.
-  void AddDeoptimizationConstant(HInstruction* value,
-                                 int32_t constant,
-                                 HBasicBlock* deopt_block,
-                                 bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-    if (loop_entry_test_block_added) {
-      DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
-    }
-
-    HIntConstant* const_instr = graph->GetIntConstant(constant);
-    HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
-    HDeoptimize* deoptimize = new (graph->GetArena())
-        HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
-    deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-        suspend_check->GetEnvironment(), header);
-  }
-
-  // Returns true if adding a (value <= array_length + offset) check for deoptimization
-  // is allowed and will benefit compiled code.
-  bool CanAddDeoptimizationArrayLength(HInstruction* value,
-                                       HArrayLength* array_length,
-                                       int32_t offset,
-                                       bool* is_proven) {
-    *is_proven = false;
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
-    DCHECK(value->GetBlock()->Dominates(pre_header));
-
-    if (array_length->GetBlock() == header) {
-      // array_length_in_loop_body_if_needed only has correct value when the loop
-      // body is entered. We bail out in this case. Usually array_length defined
-      // in the loop header is already hoisted by licm.
-      return false;
-    } else {
-      // array_length is defined either before the loop header already, or in
-      // the loop body since it's used in the loop body. If it's defined in the loop body,
-      // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
-      // all the uses of array_length must be dominated by its definition in the loop
-      // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
-      // array_length once the loop body is entered so all the uses of the phi will
-      // use the correct value.
-    }
-
-    if (offset > 0) {
-      // There might be overflow issue.
-      // TODO: handle this, possibly with some distance relationship between
-      // offset_low and offset_high, or using another deoptimization to make
-      // sure (array_length + offset) doesn't overflow.
-      return false;
-    }
-
-    // See if we can prove the relationship first.
-    if (value == array_length) {
-      if (offset >= 0) {
-        // Already true.
-        *is_proven = true;
-        return true;
-      } else {
-        // May throw exception. Don't add deoptimization.
-        // Keep bounds checks in the loops.
-        return false;
-      }
-    }
-    // Can benefit from deoptimization.
-    return true;
-  }
-
-  // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
-  void AddDeoptimizationArrayLength(HInstruction* value,
-                                    HArrayLength* array_length,
-                                    int32_t offset,
-                                    HBasicBlock* deopt_block,
-                                    bool loop_entry_test_block_added) {
-    HBasicBlock* header = induction_variable_->GetBlock();
-    DCHECK(header->IsLoopHeader());
-    HBasicBlock* pre_header = header->GetDominator();
-    if (loop_entry_test_block_added) {
-      DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
-    } else {
-      DCHECK(deopt_block == pre_header);
-    }
-    HGraph* graph = header->GetGraph();
-    HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
-    // We may need to hoist null-check and array_length out of loop first.
-    if (!array_length->GetBlock()->Dominates(deopt_block)) {
-      // array_length must be defined in the loop body.
-      DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
-      DCHECK(array_length->GetBlock() != header);
-
-      HInstruction* array = array_length->InputAt(0);
-      HNullCheck* null_check = array->AsNullCheck();
-      if (null_check != nullptr) {
-        array = null_check->InputAt(0);
-      }
-      // We've already made sure the array is defined before the loop when collecting
-      // array accesses for the loop.
-      DCHECK(array->GetBlock()->Dominates(deopt_block));
-      if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
-        // Hoist null check out of loop with a deoptimization.
-        HNullConstant* null_constant = graph->GetNullConstant();
-        HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
-        // TODO: for one dex_pc, share the same deoptimization slow path.
-        HDeoptimize* null_check_deoptimize = new (graph->GetArena())
-            HDeoptimize(null_check_cond, suspend_check->GetDexPc());
-        deopt_block->InsertInstructionBefore(
-            null_check_cond, deopt_block->GetLastInstruction());
-        deopt_block->InsertInstructionBefore(
-            null_check_deoptimize, deopt_block->GetLastInstruction());
-        // Eliminate null check in the loop.
-        null_check->ReplaceWith(array);
-        null_check->GetBlock()->RemoveInstruction(null_check);
-        null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
-            suspend_check->GetEnvironment(), header);
-      }
-
-      HArrayLength* new_array_length
-          = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
-      deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
-      if (loop_entry_test_block_added) {
-        // Replace array_length defined inside the loop body with a phi
-        // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
-        // no vreg number for it.
-        HPhi* phi = new (graph->GetArena()) HPhi(
-            graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
-        // Set to 0 if the loop body isn't entered.
-        phi->SetRawInputAt(0, graph->GetIntConstant(0));
-        // Set to array.length if the loop body is entered.
-        phi->SetRawInputAt(1, new_array_length);
-        pre_header->AddPhi(phi);
-        array_length->ReplaceWith(phi);
-        // Make sure phi is only used after the loop body is entered.
-        if (kIsDebugBuild) {
-          for (HUseIterator<HInstruction*> it(phi->GetUses());
-               !it.Done();
-               it.Advance()) {
-            HInstruction* user = it.Current()->GetUser();
-            DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
-          }
-        }
-      } else {
-        array_length->ReplaceWith(new_array_length);
-      }
-
-      array_length->GetBlock()->RemoveInstruction(array_length);
-      // Use new_array_length for deopt.
-      array_length = new_array_length;
-    }
-
-    HInstruction* added = array_length;
-    if (offset != 0) {
-      HIntConstant* offset_instr = graph->GetIntConstant(offset);
-      added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
-      deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
-    }
-    HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
-    HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
-    deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
-    deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
-    deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
-  }
-
-  // Adds deoptimizations in loop pre-header with the collected array access
-  // data so that value ranges can be established in loop body.
-  // Returns true if deoptimizations are successfully added, or if it's proven
-  // it's not necessary.
-  bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
-    int32_t offset_low = finder.GetOffsetLow();
-    int32_t offset_high = finder.GetOffsetHigh();
-    HArrayLength* array_length = finder.GetFoundArrayLength();
-
-    HBasicBlock* pre_header =
-        induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
-    if (!initial_->GetBlock()->Dominates(pre_header) ||
-        !end_->GetBlock()->Dominates(pre_header)) {
-      // Can't move initial_ or end_ into pre_header for comparisons.
-      return false;
-    }
-
-    HBasicBlock* deopt_block;
-    bool loop_entry_test_block_added = false;
-    bool is_constant_proven, is_length_proven;
-
-    HInstruction* const_comparing_instruction;
-    int32_t const_compared_to;
-    HInstruction* array_length_comparing_instruction;
-    int32_t array_length_offset;
-    if (increment_ == 1) {
-      // Increasing from initial_ to end_.
-      const_comparing_instruction = initial_;
-      const_compared_to = -offset_low;
-      array_length_comparing_instruction = end_;
-      array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
-    } else {
-      const_comparing_instruction = end_;
-      const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
-      array_length_comparing_instruction = initial_;
-      array_length_offset = -offset_high - 1;
-    }
-
-    if (CanAddDeoptimizationConstant(const_comparing_instruction,
-                                     const_compared_to,
-                                     &is_constant_proven) &&
-        CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                        array_length,
-                                        array_length_offset,
-                                        &is_length_proven)) {
-      if (!is_constant_proven || !is_length_proven) {
-        deopt_block = TransformLoopForDeoptimizationIfNeeded();
-        loop_entry_test_block_added = (deopt_block != pre_header);
-        if (loop_entry_test_block_added) {
-          // Loop body may be entered.
-          AddLoopBodyEntryTest();
-        }
-      }
-      if (!is_constant_proven) {
-        AddDeoptimizationConstant(const_comparing_instruction,
-                                  const_compared_to,
-                                  deopt_block,
-                                  loop_entry_test_block_added);
-      }
-      if (!is_length_proven) {
-        AddDeoptimizationArrayLength(array_length_comparing_instruction,
-                                     array_length,
-                                     array_length_offset,
-                                     deopt_block,
-                                     loop_entry_test_block_added);
-      }
-      return true;
-    }
-    return false;
-  }
-
  private:
   HPhi* const induction_variable_;  // Induction variable for this monotonic value range.
   HInstruction* const initial_;     // Initial value.
-  HInstruction* end_;               // End value.
-  bool inclusive_;                  // Whether end value is inclusive.
   const int32_t increment_;         // Increment for each loop iteration.
   const ValueBound bound_;          // Additional value bound info for initial_.
 
@@ -1111,7 +499,9 @@
     return block->GetBlockId() >= initial_block_size_;
   }
 
-  BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BCEVisitor(HGraph* graph,
+             const SideEffectsAnalysis& side_effects,
+             HInductionVarAnalysis* induction_analysis)
       : HGraphVisitor(graph),
         maps_(graph->GetBlocks().size(),
               ArenaSafeMap<int, ValueRange*>(
@@ -1121,8 +511,17 @@
         first_constant_index_bounds_check_map_(
             std::less<int>(),
             graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        early_exit_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        taken_test_loop_(
+            std::less<uint32_t>(),
+            graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+        finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
         need_to_revisit_block_(false),
+        has_deoptimization_on_constant_subscripts_(false),
         initial_block_size_(graph->GetBlocks().size()),
+        side_effects_(side_effects),
         induction_range_(induction_analysis) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -1138,6 +537,17 @@
     }
   }
 
+  void Finish() {
+    // Preserve SSA structure which may have been broken by adding one or more
+    // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+    InsertPhiNodes();
+
+    // Clear the loop data structures.
+    early_exit_loop_.clear();
+    taken_test_loop_.clear();
+    finite_loop_.clear();
+  }
+
  private:
   // Return the map of proven value ranges at the beginning of a basic block.
   ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1166,25 +576,6 @@
     return nullptr;
   }
 
-  // Return the range resulting from induction variable analysis of "instruction" when the value
-  // is used from "context", for example, an index used from a bounds-check inside a loop body.
-  ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
-    InductionVarRange::Value v1;
-    InductionVarRange::Value v2;
-    bool needs_finite_test = false;
-    induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test);
-    if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
-        v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
-      DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
-      DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
-      ValueBound low = ValueBound(v1.instruction, v1.b_constant);
-      ValueBound up = ValueBound(v2.instruction, v2.b_constant);
-      return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
-    }
-    // Didn't find anything useful.
-    return nullptr;
-  }
-
   // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
   // and push the narrowed value range to `successor`.
   void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1330,17 +721,6 @@
 
     bool overflow, underflow;
     if (cond == kCondLT || cond == kCondLE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() < 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondLT);
-        }
-      }
-
       if (!upper.Equals(ValueBound::Max())) {
         int32_t compensation = (cond == kCondLT) ? -1 : 0;  // upper bound is inclusive
         ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1364,17 +744,6 @@
         ApplyRangeFromComparison(left, block, false_successor, new_range);
       }
     } else if (cond == kCondGT || cond == kCondGE) {
-      if (left_monotonic_range != nullptr) {
-        // Update the info for monotonic value range.
-        if (left_monotonic_range->GetInductionVariable() == left &&
-            left_monotonic_range->GetIncrement() > 0 &&
-            block == left_monotonic_range->GetLoopHeader() &&
-            instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
-          left_monotonic_range->SetEnd(right);
-          left_monotonic_range->SetInclusive(cond == kCondGT);
-        }
-      }
-
       // array.length as a lower bound isn't considered useful.
       if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
         int32_t compensation = (cond == kCondGT) ? 1 : 0;  // lower bound is inclusive
@@ -1400,38 +769,34 @@
     }
   }
 
-  void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+  void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
     HBasicBlock* block = bounds_check->GetBlock();
     HInstruction* index = bounds_check->InputAt(0);
     HInstruction* array_length = bounds_check->InputAt(1);
     DCHECK(array_length->IsIntConstant() ||
            array_length->IsArrayLength() ||
            array_length->IsPhi());
-
-    if (array_length->IsPhi()) {
-      // Input 1 of the phi contains the real array.length once the loop body is
-      // entered. That value will be used for bound analysis. The graph is still
-      // strictly in SSA form.
-      array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
-    }
+    bool try_dynamic_bce = true;
 
     if (!index->IsIntConstant()) {
+      // Non-constant subscript.
       ValueBound lower = ValueBound(nullptr, 0);        // constant 0
       ValueBound upper = ValueBound(array_length, -1);  // array_length - 1
       ValueRange array_range(GetGraph()->GetArena(), lower, upper);
-      // Try range obtained by local analysis.
+      // Try range obtained by dominator-based analysis.
       ValueRange* index_range = LookupValueRange(index, block);
       if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+        ReplaceInstruction(bounds_check, index);
         return;
       }
       // Try range obtained by induction variable analysis.
-      index_range = LookupInductionRange(bounds_check, index);
-      if (index_range != nullptr && index_range->FitsIn(&array_range)) {
-        ReplaceBoundsCheck(bounds_check, index);
+      // Disables dynamic bce if OOB is certain.
+      if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+        ReplaceInstruction(bounds_check, index);
         return;
       }
     } else {
+      // Constant subscript.
       int32_t constant = index->AsIntConstant()->GetValue();
       if (constant < 0) {
         // Will always throw exception.
@@ -1439,7 +804,7 @@
       }
       if (array_length->IsIntConstant()) {
         if (constant < array_length->AsIntConstant()->GetValue()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
         }
         return;
       }
@@ -1450,7 +815,7 @@
         ValueBound lower = existing_range->GetLower();
         DCHECK(lower.IsConstant());
         if (constant < lower.GetConstant()) {
-          ReplaceBoundsCheck(bounds_check, index);
+          ReplaceInstruction(bounds_check, index);
           return;
         } else {
           // Existing range isn't strong enough to eliminate the bounds check.
@@ -1485,11 +850,11 @@
           ValueRange(GetGraph()->GetArena(), lower, upper);
       GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
     }
-  }
 
-  void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
-    bounds_check->ReplaceWith(index);
-    bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+    // If static analysis fails, and OOB is not certain, try dynamic elimination.
+    if (try_dynamic_bce) {
+      TryDynamicBCE(bounds_check);
+    }
   }
 
   static bool HasSameInputAtBackEdges(HPhi* phi) {
@@ -1508,7 +873,7 @@
     return true;
   }
 
-  void VisitPhi(HPhi* phi) {
+  void VisitPhi(HPhi* phi) OVERRIDE {
     if (phi->IsLoopHeaderPhi()
         && (phi->GetType() == Primitive::kPrimInt)
         && HasSameInputAtBackEdges(phi)) {
@@ -1555,7 +920,7 @@
     }
   }
 
-  void VisitIf(HIf* instruction) {
+  void VisitIf(HIf* instruction) OVERRIDE {
     if (instruction->InputAt(0)->IsCondition()) {
       HCondition* cond = instruction->InputAt(0)->AsCondition();
       IfCondition cmp = cond->GetCondition();
@@ -1564,42 +929,11 @@
         HInstruction* left = cond->GetLeft();
         HInstruction* right = cond->GetRight();
         HandleIf(instruction, left, right, cmp);
-
-        HBasicBlock* block = instruction->GetBlock();
-        ValueRange* left_range = LookupValueRange(left, block);
-        if (left_range == nullptr) {
-          return;
-        }
-
-        if (left_range->IsMonotonicValueRange() &&
-            block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
-          // The comparison is for an induction variable in the loop header.
-          DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
-          HBasicBlock* loop_body_successor =
-            left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
-          if (loop_body_successor == nullptr) {
-            // In case it's some strange loop structure.
-            return;
-          }
-          ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
-          if ((new_left_range == left_range) ||
-              // Range narrowed with deoptimization is usually more useful than
-              // a constant range.
-              new_left_range->IsConstantValueRange()) {
-            // We are not successful in narrowing the monotonic value range to
-            // a regular value range. Try using deoptimization.
-            new_left_range = left_range->AsMonotonicValueRange()->
-                NarrowWithDeoptimization();
-            if (new_left_range != left_range) {
-              GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
-            }
-          }
-        }
       }
     }
   }
 
-  void VisitAdd(HAdd* add) {
+  void VisitAdd(HAdd* add) OVERRIDE {
     HInstruction* right = add->GetRight();
     if (right->IsIntConstant()) {
       ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1613,7 +947,7 @@
     }
   }
 
-  void VisitSub(HSub* sub) {
+  void VisitSub(HSub* sub) OVERRIDE {
     HInstruction* left = sub->GetLeft();
     HInstruction* right = sub->GetRight();
     if (right->IsIntConstant()) {
@@ -1715,19 +1049,19 @@
     }
   }
 
-  void VisitDiv(HDiv* div) {
+  void VisitDiv(HDiv* div) OVERRIDE {
     FindAndHandlePartialArrayLength(div);
   }
 
-  void VisitShr(HShr* shr) {
+  void VisitShr(HShr* shr) OVERRIDE {
     FindAndHandlePartialArrayLength(shr);
   }
 
-  void VisitUShr(HUShr* ushr) {
+  void VisitUShr(HUShr* ushr) OVERRIDE {
     FindAndHandlePartialArrayLength(ushr);
   }
 
-  void VisitAnd(HAnd* instruction) {
+  void VisitAnd(HAnd* instruction) OVERRIDE {
     if (instruction->GetRight()->IsIntConstant()) {
       int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
       if (constant > 0) {
@@ -1742,7 +1076,7 @@
     }
   }
 
-  void VisitNewArray(HNewArray* new_array) {
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
     HInstruction* len = new_array->InputAt(0);
     if (!len->IsIntConstant()) {
       HInstruction *left;
@@ -1766,9 +1100,12 @@
     }
   }
 
-  void VisitDeoptimize(HDeoptimize* deoptimize) {
-    // Right now it's only HLessThanOrEqual.
-    DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
+      return;
+    }
+    // If this instruction was added by AddCompareWithDeoptimization(), narrow
+    // the range accordingly in subsequent basic blocks.
     HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
     HInstruction* instruction = less_than_or_equal->InputAt(0);
     if (instruction->IsArrayLength()) {
@@ -1782,6 +1119,35 @@
     }
   }
 
+  /**
+    * After null/bounds checks are eliminated, some invariant array references
+    * may be exposed underneath which can be hoisted out of the loop to the
+    * preheader or, in combination with dynamic bce, the deoptimization block.
+    *
+    * for (int i = 0; i < n; i++) {
+    *                                <-------+
+    *   for (int j = 0; j < n; j++)          |
+    *     a[i][j] = 0;               --a[i]--+
+    * }
+    *
+    * Note: this optimization is no longer applied after deoptimization on array references
+    * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
+    * those cases it would be unsafe to hoist array references across their deoptimization
+    * instruction inside a loop.
+    */
+  void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+    if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+      HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+      if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) &&
+          loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) {
+        SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+        if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+          HoistToPreheaderOrDeoptBlock(loop, array_get);
+        }
+      }
+    }
+  }
+
   void AddCompareWithDeoptimization(HInstruction* array_length,
                                     HIntConstant* const_instr,
                                     HBasicBlock* block) {
@@ -1803,6 +1169,9 @@
     block->InsertInstructionBefore(cond, bounds_check);
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+    // Flag that this kind of deoptimization on array references with constant
+    // subscripts has occurred to prevent further hoisting of these references.
+    has_deoptimization_on_constant_subscripts_ = true;
   }
 
   void AddComparesWithDeoptimization(HBasicBlock* block) {
@@ -1846,21 +1215,432 @@
     }
   }
 
+  /**
+   * Returns true if static range analysis based on induction variables can determine the bounds
+   * check on the given array range is always satisfied with the computed index range. The output
+   * parameter try_dynamic_bce is set to false if OOB is certain.
+   */
+  bool InductionRangeFitsIn(ValueRange* array_range,
+                            HInstruction* context,
+                            HInstruction* index,
+                            bool* try_dynamic_bce) {
+    InductionVarRange::Value v1;
+    InductionVarRange::Value v2;
+    bool needs_finite_test = false;
+    induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
+    do {
+      if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+          v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+        DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+        DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+        ValueRange index_range(GetGraph()->GetArena(),
+                               ValueBound(v1.instruction, v1.b_constant),
+                               ValueBound(v2.instruction, v2.b_constant));
+        // If analysis reveals a certain OOB, disable dynamic BCE.
+        if (index_range.GetLower().LessThan(array_range->GetLower()) ||
+            index_range.GetUpper().GreaterThan(array_range->GetUpper())) {
+          *try_dynamic_bce = false;
+          return false;
+        }
+        // Use analysis for static bce only if loop is finite.
+        if (!needs_finite_test && index_range.FitsIn(array_range)) {
+          return true;
+        }
+      }
+    } while (induction_range_.RefineOuter(&v1, &v2));
+    return false;
+  }
+
+  /**
+   * When the compiler fails to remove a bounds check statically, we try to remove the bounds
+   * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
+   * will go out of range (we want to be rather certain of that given the slowdown of
+   * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
+   * bounds checks and related null checks removed.
+   */
+  void TryDynamicBCE(HBoundsCheck* instruction) {
+    HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
+    HInstruction* index = instruction->InputAt(0);
+    HInstruction* length = instruction->InputAt(1);
+    // If dynamic bounds check elimination seems profitable and is possible, then proceed.
+    bool needs_finite_test = false;
+    bool needs_taken_test = false;
+    if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
+        induction_range_.CanGenerateCode(
+            instruction, index, &needs_finite_test, &needs_taken_test) &&
+        CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+        CanHandleLength(loop, length, needs_taken_test)) {  // do this test last (may code gen)
+      HInstruction* lower = nullptr;
+      HInstruction* upper = nullptr;
+      // Generate the following unsigned comparisons
+      //     if (lower > upper)   deoptimize;
+      //     if (upper >= length) deoptimize;
+      // or, for a non-induction index, just the unsigned comparison on its 'upper' value
+      //     if (upper >= length) deoptimize;
+      // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
+      // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
+      // correctly guard against any possible OOB (including arithmetic wrap-around cases).
+      HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+      induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
+      if (lower != nullptr) {
+        InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+      }
+      InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+      ReplaceInstruction(instruction, index);
+    }
+  }
+
+  /**
+   * Returns true if heuristics indicate that dynamic bce may be profitable.
+   */
+  bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+    if (loop != nullptr) {
+      // A try boundary preheader is hard to handle.
+      // TODO: remove this restriction
+      if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+        return false;
+      }
+      // Does loop have early-exits? If so, the full range may not be covered by the loop
+      // at runtime and testing the range may apply deoptimization unnecessarily.
+      if (IsEarlyExitLoop(loop)) {
+        return false;
+      }
+      // Does the current basic block dominate all back edges? If not,
+      // don't apply dynamic bce to something that may not be executed.
+      for (HBasicBlock* back_edge : loop->GetBackEdges()) {
+        if (!block->Dominates(back_edge)) {
+          return false;
+        }
+      }
+      // Success!
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the loop has early exits, which implies it may not cover
+   * the full range computed by range analysis based on induction variables.
+   */
+  bool IsEarlyExitLoop(HLoopInformation* loop) {
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+    auto it = early_exit_loop_.find(loop_id);
+    if (it != early_exit_loop_.end()) {
+      return it->second;
+    }
+    // First time early-exit analysis for this loop. Since analysis requires scanning
+    // the full loop-body, results of the analysis is stored for subsequent queries.
+    HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+    for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+      for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+        if (!loop->Contains(*successor)) {
+          early_exit_loop_.Put(loop_id, true);
+          return true;
+        }
+      }
+    }
+    early_exit_loop_.Put(loop_id, false);
+    return false;
+  }
+
+  /**
+   * Returns true if the array length is already loop invariant, or can be made so
+   * by handling the null check under the hood of the array length operation.
+   */
+  bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+    if (loop->IsDefinedOutOfTheLoop(length)) {
+      return true;
+    } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+      if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+        HoistToPreheaderOrDeoptBlock(loop, length);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if the null check is already loop invariant, or can be made so
+   * by generating a deoptimization test.
+   */
+  bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+    if (loop->IsDefinedOutOfTheLoop(check)) {
+      return true;
+    } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+      HInstruction* array = check->InputAt(0);
+      if (loop->IsDefinedOutOfTheLoop(array)) {
+        // Generate: if (array == null) deoptimize;
+        HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+        HInstruction* cond =
+            new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+        InsertDeopt(loop, block, cond);
+        ReplaceInstruction(check, array);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Returns true if compiler can apply dynamic bce to loops that may be infinite
+   * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+   * the range analysis evaluation code by "overshooting" the computed range.
+   * Since deoptimization would be a bad choice, and there is no other version
+   * of the loop to use, dynamic bce in such cases is only allowed if other tests
+   * ensure the loop is finite.
+   */
+  bool CanHandleInfiniteLoop(
+      HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+    if (needs_infinite_test) {
+      // If we already forced the loop to be finite, allow directly.
+      const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+      if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+        return true;
+      }
+      // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+      // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+      // ensure upper bound cannot cause an infinite loop.
+      HInstruction* control = loop->GetHeader()->GetLastInstruction();
+      if (control->IsIf()) {
+        HInstruction* if_expr = control->AsIf()->InputAt(0);
+        if (if_expr->IsCondition()) {
+          HCondition* condition = if_expr->AsCondition();
+          if (index == condition->InputAt(0) ||
+              index == condition->InputAt(1)) {
+            finite_loop_.insert(loop_id);
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    return true;
+  }
+
+  /** Inserts a deoptimization test. */
+  void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+    HInstruction* suspend = loop->GetSuspendCheck();
+    block->InsertInstructionBefore(condition, block->GetLastInstruction());
+    HDeoptimize* deoptimize =
+        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+    if (suspend->HasEnvironment()) {
+      deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+          suspend->GetEnvironment(), loop->GetHeader());
+    }
+  }
+
+  /** Hoists instruction out of the loop to preheader or deoptimization block. */
+  void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+    // Use preheader unless there is an earlier generated deoptimization block since
+    // hoisted expressions may depend on and/or used by the deoptimization tests.
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    HBasicBlock* preheader = loop->GetPreHeader();
+    HBasicBlock* block = preheader;
+    auto it = taken_test_loop_.find(loop_id);
+    if (it != taken_test_loop_.end()) {
+      block = it->second;
+    }
+    // Hoist the instruction.
+    DCHECK(!instruction->HasEnvironment());
+    instruction->MoveBefore(block->GetLastInstruction());
+  }
+
+  /**
+   * Adds a new taken-test structure to a loop if needed (and not already done).
+   * The taken-test protects range analysis evaluation code to avoid any
+   * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+   *
+   * Returns block in which deoptimizations/invariants can be put.
+   *
+   *          old_preheader
+   *               |
+   *            if_block          <- taken-test protects deoptimization block
+   *            /      \
+   *     true_block  false_block  <- deoptimizations/invariants are placed in true_block
+   *            \       /
+   *          new_preheader       <- may require phi nodes to preserve SSA structure
+   *                |
+   *             header
+   *
+   * For example, this loop:
+   *
+   *   for (int i = lower; i < upper; i++) {
+   *     array[i] = 0;
+   *   }
+   *
+   * will be transformed to:
+   *
+   *   if (lower < upper) {
+   *     if (array == null) deoptimize;
+   *     array_length = array.length;
+   *     if (lower > upper)         deoptimize;  // unsigned
+   *     if (upper >= array_length) deoptimize;  // unsigned
+   *   } else {
+   *     array_length = 0;
+   *   }
+   *   for (int i = lower; i < upper; i++) {
+   *     // Loop without null check and bounds check, and any array.length replaced with array_length.
+   *     array[i] = 0;
+   *   }
+   */
+  HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+    // Not needed (can use preheader), or already done (can reuse)?
+    const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+    if (!needs_taken_test) {
+      return loop->GetPreHeader();
+    } else {
+      auto it = taken_test_loop_.find(loop_id);
+      if (it != taken_test_loop_.end()) {
+        return it->second;
+      }
+    }
+
+    // Generate top test structure.
+    HBasicBlock* header = loop->GetHeader();
+    GetGraph()->TransformLoopHeaderForBCE(header);
+    HBasicBlock* new_preheader = loop->GetPreHeader();
+    HBasicBlock* if_block = new_preheader->GetDominator();
+    HBasicBlock* true_block = if_block->GetSuccessors()[0];  // True successor.
+    HBasicBlock* false_block = if_block->GetSuccessors()[1];  // False successor.
+
+    // Goto instructions.
+    true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+    new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+    // Insert the taken-test to see if the loop body is entered. If the
+    // loop isn't entered at all, it jumps around the deoptimization block.
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());  // placeholder
+    HInstruction* condition = nullptr;
+    induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+                                       GetGraph(),
+                                       if_block,
+                                       &condition);
+    DCHECK(condition != nullptr);
+    if_block->RemoveInstruction(if_block->GetLastInstruction());
+    if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+    taken_test_loop_.Put(loop_id, true_block);
+    return true_block;
+  }
+
+  /**
+   * Inserts phi nodes that preserve SSA structure in generated top test structures.
+   * All uses of instructions in the deoptimization block that reach the loop need
+   * a phi node in the new loop preheader to fix the dominance relation.
+   *
+   * Example:
+   *           if_block
+   *            /      \
+   *         x_0 = ..  false_block
+   *            \       /
+   *           x_1 = phi(x_0, null)   <- synthetic phi
+   *               |
+   *             header
+   */
+  void InsertPhiNodes() {
+    // Scan all new deoptimization blocks.
+    for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+      HBasicBlock* true_block = it1->second;
+      HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+      // Scan all instructions in a new deoptimization block.
+      for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+        HInstruction* instruction = it.Current();
+        Primitive::Type type = instruction->GetType();
+        HPhi* phi = nullptr;
+        // Scan all uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HInstruction*> it2(instruction->GetUses());
+             !it2.Done();
+             it2.Advance()) {
+          HInstruction* user = it2.Current()->GetUser();
+          if (user->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->ReplaceInput(phi, it2.Current()->GetIndex());
+          }
+        }
+        // Scan all environment uses of an instruction and replace each later use with a phi node.
+        for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
+             !it2.Done();
+             it2.Advance()) {
+          HEnvironment* user = it2.Current()->GetUser();
+          if (user->GetHolder()->GetBlock() != true_block) {
+            if (phi == nullptr) {
+              phi = NewPhi(new_preheader, instruction, type);
+            }
+            user->RemoveAsUserOfInput(it2.Current()->GetIndex());
+            user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
+            phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+   * These are synthetic phi nodes without a virtual register.
+   */
+  HPhi* NewPhi(HBasicBlock* new_preheader,
+               HInstruction* instruction,
+               Primitive::Type type) {
+    HGraph* graph = GetGraph();
+    HInstruction* zero;
+    switch (type) {
+      case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
+      case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+      case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+      default: zero = graph->GetConstant(type, 0); break;
+    }
+    HPhi* phi = new (graph->GetArena())
+        HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+    phi->SetRawInputAt(0, instruction);
+    phi->SetRawInputAt(1, zero);
+    new_preheader->AddPhi(phi);
+    return phi;
+  }
+
+  /** Helper method to replace an instruction with another instruction. */
+  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    instruction->ReplaceWith(replacement);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+
+  // A set of maps, one per basic block, from instruction to range.
   ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
 
   // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
   // a block that checks a constant index against that HArrayLength.
   ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
 
+  // Early-exit loop bookkeeping.
+  ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+  // Taken-test loop bookkeeping.
+  ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+  // Finite loop bookkeeping.
+  ArenaSet<uint32_t> finite_loop_;
+
   // For the block, there is at least one HArrayLength instruction for which there
   // is more than one bounds check instruction with constant indexing. And it's
   // beneficial to add a compare instruction that has deoptimization fallback and
   // eliminate those bounds checks.
   bool need_to_revisit_block_;
 
+  // Flag that denotes whether deoptimization has occurred on array references
+  // with constant subscripts (see AddCompareWithDeoptimization()).
+  bool has_deoptimization_on_constant_subscripts_;
+
   // Initial number of blocks.
   uint32_t initial_block_size_;
 
+  // Side effects.
+  const SideEffectsAnalysis& side_effects_;
+
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
@@ -1872,14 +1652,12 @@
     return;
   }
 
-  BCEVisitor visitor(graph_, induction_analysis_);
   // Reverse post order guarantees a node's dominators are visited first.
   // We want to visit in the dominator-based order since if a value is known to
   // be bounded by a range at one instruction, it must be true that all uses of
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
-  //
-  // TODO: only visit blocks that dominate some array accesses.
+  BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
   HBasicBlock* last_visited_block = nullptr;
   for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* current = it.Current();
@@ -1896,6 +1674,9 @@
     visitor.VisitBasicBlock(current);
     last_visited_block = current;
   }
+
+  // Perform cleanup.
+  visitor.Finish();
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca..b9df686 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,12 +21,16 @@
 
 namespace art {
 
+class SideEffectsAnalysis;
 class HInductionVarAnalysis;
 
 class BoundsCheckElimination : public HOptimization {
  public:
-  BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+  BoundsCheckElimination(HGraph* graph,
+                         const SideEffectsAnalysis& side_effects,
+                         HInductionVarAnalysis* induction_analysis)
       : HOptimization(graph, kBoundsCheckEliminiationPassName),
+        side_effects_(side_effects),
         induction_analysis_(induction_analysis) {}
 
   void Run() OVERRIDE;
@@ -34,6 +38,7 @@
   static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
 
  private:
+  const SideEffectsAnalysis& side_effects_;
   HInductionVarAnalysis* induction_analysis_;
 
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2..dbeb1cc 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -54,7 +54,7 @@
     HInductionVarAnalysis induction(graph_);
     induction.Run();
 
-    BoundsCheckElimination(graph_, &induction).Run();
+    BoundsCheckElimination(graph_, side_effects, &induction).Run();
   }
 
   ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index d7754e8..2bbf500 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -735,6 +735,79 @@
   }
 }
 
+ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
+
+  ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+  Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+  ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+      *dex_compilation_unit_->GetDexFile(),
+      method_idx,
+      dex_compilation_unit_->GetDexCache(),
+      class_loader,
+      /* referrer */ nullptr,
+      invoke_type);
+
+  if (UNLIKELY(resolved_method == nullptr)) {
+    // Clean up any exception left by type resolution.
+    soa.Self()->ClearException();
+    return nullptr;
+  }
+
+  // Check access. The class linker has a fast path for looking into the dex cache
+  // and does not check the access if it hits it.
+  if (compiling_class.Get() == nullptr) {
+    if (!resolved_method->IsPublic()) {
+      return nullptr;
+    }
+  } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+                                                       resolved_method,
+                                                       dex_compilation_unit_->GetDexCache().Get(),
+                                                       method_idx)) {
+    return nullptr;
+  }
+
+  // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+  // We need to look at the referrer's super class vtable.
+  if (invoke_type == kSuper) {
+    if (compiling_class.Get() == nullptr) {
+      // Invoking a super method requires knowing the actual super class. If we did not resolve
+      // the compiling method's declaring class (which only happens for ahead of time compilation),
+      // bail out.
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      return nullptr;
+    }
+    uint16_t vtable_index = resolved_method->GetMethodIndex();
+    ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+        vtable_index, class_linker->GetImagePointerSize());
+    if (actual_method != resolved_method &&
+        !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+      // TODO: The actual method could still be referenced in the current dex file, so we
+      // could try locating it.
+      // TODO: Remove the dex_file restriction.
+      return nullptr;
+    }
+    if (!actual_method->IsInvokable()) {
+      // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+      // could resolve the callee to the wrong method.
+      return nullptr;
+    }
+    resolved_method = actual_method;
+  }
+
+  // Check for incompatible class changes. The class linker has a fast path for
+  // looking into the dex cache and does not check incompatible class changes if it hits it.
+  if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+    return nullptr;
+  }
+
+  return resolved_method;
+}
+
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
                                 uint32_t dex_pc,
                                 uint32_t method_idx,
@@ -742,22 +815,18 @@
                                 bool is_range,
                                 uint32_t* args,
                                 uint32_t register_index) {
-  InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
-  InvokeType optimized_invoke_type = original_invoke_type;
+  InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
   const char* descriptor = dex_file_->GetMethodShorty(method_idx);
   Primitive::Type return_type = Primitive::GetType(descriptor[0]);
 
   // Remove the return type from the 'proto'.
   size_t number_of_arguments = strlen(descriptor) - 1;
-  if (original_invoke_type != kStatic) {  // instance call
+  if (invoke_type != kStatic) {  // instance call
     // One extra argument for 'this'.
     number_of_arguments++;
   }
 
   MethodReference target_method(dex_file_, method_idx);
-  int32_t table_index = 0;
-  uintptr_t direct_code = 0;
-  uintptr_t direct_method = 0;
 
   // Special handling for string init.
   int32_t string_init_offset = 0;
@@ -780,7 +849,7 @@
         method_idx,
         target_method,
         dispatch_info,
-        original_invoke_type,
+        invoke_type,
         kStatic /* optimized_invoke_type */,
         HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
     return HandleStringInit(invoke,
@@ -791,23 +860,16 @@
                             descriptor);
   }
 
-  // Handle unresolved methods.
-  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
-                                           dex_pc,
-                                           true /* update_stats */,
-                                           true /* enable_devirtualization */,
-                                           &optimized_invoke_type,
-                                           &target_method,
-                                           &table_index,
-                                           &direct_code,
-                                           &direct_method)) {
+  ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+  if (resolved_method == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
     HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
                                                      number_of_arguments,
                                                      return_type,
                                                      dex_pc,
                                                      method_idx,
-                                                     original_invoke_type);
+                                                     invoke_type);
     return HandleInvoke(invoke,
                         number_of_vreg_arguments,
                         args,
@@ -817,21 +879,26 @@
                         nullptr /* clinit_check */);
   }
 
-  // Handle resolved methods (non string init).
-
-  DCHECK(optimized_invoke_type != kSuper);
-
   // Potential class initialization check, in the case of a static method call.
   HClinitCheck* clinit_check = nullptr;
   HInvoke* invoke = nullptr;
 
-  if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
+  if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
     // By default, consider that the called method implicitly requires
     // an initialization check of its declaring method.
     HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
         = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
-    if (optimized_invoke_type == kStatic) {
-      clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
+    ScopedObjectAccess soa(Thread::Current());
+    if (invoke_type == kStatic) {
+      clinit_check = ProcessClinitCheckForInvoke(
+          dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+    } else if (invoke_type == kSuper) {
+      if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+        // Update the target method to the one resolved. Note that this may be a no-op if
+        // we resolved to the method referenced by the instruction.
+        method_idx = resolved_method->GetDexMethodIndex();
+        target_method = MethodReference(dex_file_, method_idx);
+      }
     }
 
     HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
@@ -847,24 +914,26 @@
                                                 method_idx,
                                                 target_method,
                                                 dispatch_info,
-                                                original_invoke_type,
-                                                optimized_invoke_type,
+                                                invoke_type,
+                                                invoke_type,
                                                 clinit_check_requirement);
-  } else if (optimized_invoke_type == kVirtual) {
+  } else if (invoke_type == kVirtual) {
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeVirtual(arena_,
                                          number_of_arguments,
                                          return_type,
                                          dex_pc,
                                          method_idx,
-                                         table_index);
+                                         resolved_method->GetMethodIndex());
   } else {
-    DCHECK_EQ(optimized_invoke_type, kInterface);
+    DCHECK_EQ(invoke_type, kInterface);
+    ScopedObjectAccess soa(Thread::Current());  // Needed for the method index
     invoke = new (arena_) HInvokeInterface(arena_,
                                            number_of_arguments,
                                            return_type,
                                            dex_pc,
                                            method_idx,
-                                           table_index);
+                                           resolved_method->GetDexMethodIndex());
   }
 
   return HandleInvoke(invoke,
@@ -962,23 +1031,18 @@
 
 HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* resolved_method,
       uint32_t method_idx,
       HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
-  ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
+  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+  Thread* self = Thread::Current();
+  StackHandleScope<4> hs(self);
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(
-          soa.Self(), *dex_compilation_unit_->GetDexFile())));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
-      soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
-      soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
-
-  DCHECK(resolved_method != nullptr);
-
-  const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+          self, *dex_compilation_unit_->GetDexFile())));
   Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
-      outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+      outer_compilation_unit_->GetClassLinker()->FindDexCache(
+          self, outer_dex_file)));
   Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
   Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 5ada93f..ca71c32 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,9 @@
 
   static constexpr const char* kBuilderPassName = "builder";
 
-  // The number of entries in a packed switch before we use a jump table.
-  static constexpr uint16_t kSmallSwitchThreshold = 5;
+  // The number of entries in a packed switch before we use a jump table or specified
+  // compare/jump series.
+  static constexpr uint16_t kSmallSwitchThreshold = 3;
 
  private:
   // Analyzes the dex instruction and adds HInstruction to the graph
@@ -305,8 +306,10 @@
 
   HClinitCheck* ProcessClinitCheckForInvoke(
       uint32_t dex_pc,
+      ArtMethod* method,
       uint32_t method_idx,
-      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
+      HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+      SHARED_REQUIRES(Locks::mutator_lock_);
 
   // Build a HNewInstance instruction.
   bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
@@ -315,6 +318,10 @@
   bool IsInitialized(Handle<mirror::Class> cls) const
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Try to resolve a method using the class linker. Return null if a method could
+  // not be resolved.
+  ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+
   ArenaAllocator* const arena_;
 
   // A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index a98d9c6..0a26786 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
 // S registers. Therefore there is no need to block it.
 static constexpr DRegister DTMP = D31;
 
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -724,7 +724,9 @@
                       graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       call_patches_(MethodReferenceComparator(),
                     graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+      relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+                                    graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
 }
@@ -1922,10 +1924,18 @@
                                          codegen_->GetAssembler(),
                                          codegen_->GetInstructionSetFeatures());
   if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
     return;
   }
 
   HandleInvoke(invoke);
+
+  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+  if (invoke->HasPcRelativeDexCache()) {
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -5818,16 +5828,6 @@
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       MethodReference target_method) {
-  if (desired_dispatch_info.method_load_kind ==
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
-    // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
-    return HInvokeStaticOrDirect::DispatchInfo {
-      HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
-      HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
-      0u,
-      0u
-    };
-  }
   if (desired_dispatch_info.code_ptr_location ==
       HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
     const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5850,6 +5850,32 @@
   return desired_dispatch_info;
 }
 
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                 Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return location.AsRegister<Register>();
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+    return temp;
+  }
+  return location.AsRegister<Register>();
+}
+
 void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
   // For better instruction scheduling we load the direct code pointer before the method pointer.
   switch (invoke->GetCodePtrLocation()) {
@@ -5881,11 +5907,15 @@
       __ LoadLiteral(temp.AsRegister<Register>(),
                      DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      // TODO: Implement this type.
-      // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
-      LOG(FATAL) << "Unsupported";
-      UNREACHABLE();
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+      HArmDexCacheArraysBase* base =
+          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register method_reg;
@@ -5943,12 +5973,16 @@
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+  __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -5970,7 +6004,11 @@
 
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
-  size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+  size_t size =
+      method_patches_.size() +
+      call_patches_.size() +
+      relative_call_patches_.size() +
+      /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
   linker_patches->reserve(size);
   for (const auto& entry : method_patches_) {
     const MethodReference& target_method = entry.first;
@@ -5996,6 +6034,28 @@
                                                              info.target_method.dex_file,
                                                              info.target_method.dex_method_index));
   }
+  for (const auto& pair : dex_cache_arrays_base_labels_) {
+    HArmDexCacheArraysBase* base = pair.first;
+    const DexCacheArraysBaseLabels* labels = &pair.second;
+    const DexFile& dex_file = base->GetDexFile();
+    size_t base_element_offset = base->GetElementOffset();
+    DCHECK(labels->add_pc_label.IsBound());
+    uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+    // Add MOVW patch.
+    DCHECK(labels->movw_label.IsBound());
+    uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+    // Add MOVT patch.
+    DCHECK(labels->movt_label.IsBound());
+    uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+                                                              &dex_file,
+                                                              add_pc_offset,
+                                                              base_element_offset));
+  }
 }
 
 Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -6046,7 +6106,7 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
       codegen_->GetAssembler()->IsThumb()) {
     locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
     if (switch_instr->GetStartValue() != 0) {
@@ -6062,12 +6122,30 @@
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+  if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
     // Create a series of compare/jumps.
+    Register temp_reg = IP;
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      GenerateCompareWithImmediate(value_reg, lower_bound + i);
-      __ b(codegen_->GetLabelOf(successors[i]), EQ);
+    // Jump to successors[0] if value == lower_bound.
+    __ b(codegen_->GetLabelOf(successors[0]), EQ);
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      GenerateCompareWithImmediate(temp_reg, 1);
+      __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
     }
 
     // And the default for any other value.
@@ -6107,6 +6185,23 @@
   }
 }
 
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+  locations->SetOut(Location::RequiresRegister());
+  codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+  CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+  __ BindTrackedLabel(&labels->movw_label);
+  __ movw(base_reg, 0u);
+  __ BindTrackedLabel(&labels->movt_label);
+  __ movt(base_reg, 0u);
+  __ BindTrackedLabel(&labels->add_pc_label);
+  __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 89de4f8..193add2 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -373,6 +373,31 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+  // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+  // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+  // currently emit these 3 instructions together, instruction scheduling could
+  // split this sequence apart, so we keep separate labels for each of them.
+  struct DexCacheArraysBaseLabels {
+    DexCacheArraysBaseLabels() = default;
+    DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+    Label movw_label;
+    Label movt_label;
+    Label add_pc_label;
+  };
+
+  void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+    DexCacheArraysBaseLabels labels;
+    dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+  }
+
+  DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+    auto it = dex_cache_arrays_base_labels_.find(base);
+    DCHECK(it != dex_cache_arrays_base_labels_.end());
+    return &it->second;
+  }
+
   // Generate a read barrier for a heap reference within `instruction`.
   //
   // A read barrier for an object reference read from the heap is
@@ -419,7 +444,12 @@
   void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
 
  private:
+  Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
   using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+  using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+                                                     DexCacheArraysBaseLabels,
+                                                     std::less<HArmDexCacheArraysBase*>>;
 
   Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
   Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -441,6 +471,8 @@
   // Using ArenaDeque<> which retains element addresses on push/emplace_back().
   ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
 
+  DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 97f9995..227f4be 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -42,6 +42,9 @@
 
 namespace art {
 
+template<class MirrorType>
+class GcRoot;
+
 namespace arm64 {
 
 using helpers::CPURegisterFrom;
@@ -68,10 +71,10 @@
 using helpers::ArtVixlRegCodeCoherentForRegSet;
 
 static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 inline Condition ARM64Condition(IfCondition cond) {
   switch (cond) {
@@ -431,15 +434,6 @@
 
     __ Bind(GetEntryLabel());
 
-    if (instruction_->IsCheckCast()) {
-      // The codegen for the instruction overwrites `temp`, so put it back in place.
-      Register obj = InputRegisterAt(instruction_, 0);
-      Register temp = WRegisterFrom(locations->GetTemp(0));
-      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-      __ Ldr(temp, HeapOperand(obj, class_offset));
-      arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
-    }
-
     if (!is_fatal_) {
       SaveLiveRegisters(codegen, locations);
     }
@@ -552,7 +546,7 @@
 
 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
   uint32_t num_entries = switch_instr_->GetNumEntries();
-  DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
 
   // We are about to use the assembler to place literals directly. Make sure we have enough
   // underlying code buffer and we have generated the jump table with right size.
@@ -572,6 +566,271 @@
   }
 }
 
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
+                                           Location out,
+                                           Location ref,
+                                           Location obj,
+                                           uint32_t offset,
+                                           Location index)
+      : instruction_(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ Ldr(out, HeapOperand(out, class_offset);
+    //   codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(!instruction_->IsInvoke() ||
+           (instruction_->IsInvokeStaticOrDirect() &&
+            instruction_->GetLocations()->Intrinsified()));
+
+    __ Bind(GetEntryLabel());
+
+    // Note: In the case of a HArrayGet instruction, when the base
+    // address is a HArm64IntermediateAddress instruction, it does not
+    // point to the array object itself, but to an offset within this
+    // object. However, the read barrier entry point needs the array
+    // object address to be passed as first argument. So we
+    // temporarily set back `obj_` to that address, and restore its
+    // initial value later.
+    if (instruction_->IsArrayGet() &&
+        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* intermediate_address =
+            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+        uint32_t intermediate_address_offset =
+            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+        DCHECK_EQ(intermediate_address_offset, offset_);
+        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+      }
+      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+      __ Sub(obj_reg, obj_reg, offset_);
+    }
+
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
+        if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to vixl::MacroAssembler::Lsl and
+          // vixl::MacroAssembler::Mov below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Mov(free_reg.W(), index_reg);
+          index_reg = free_reg;
+          index = LocationFrom(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Add(index_reg, index_reg, Operand(offset_));
+      } else {
+        DCHECK(instruction_->IsInvoke());
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          LocationFrom(calling_convention.GetRegisterAt(0)),
+                          type,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          LocationFrom(calling_convention.GetRegisterAt(1)),
+                          type,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            LocationFrom(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
+    }
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+
+    // Restore the value of `obj_` when it corresponds to a
+    // HArm64IntermediateAddress instruction.
+    if (instruction_->IsArrayGet() &&
+        instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+      if (kIsDebugBuild) {
+        HArm64IntermediateAddress* intermediate_address =
+            instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+        uint32_t intermediate_address_offset =
+            intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+        DCHECK_EQ(intermediate_address_offset, offset_);
+        DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+      }
+      Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+      __ Add(obj_reg, obj_reg, offset_);
+    }
+
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
+    size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+        return Register(VIXLRegCodeFromART(i), kXRegSize);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on ARM64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free register";
+    UNREACHABLE();
+  }
+
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
+      : instruction_(instruction), out_(out), root_(root) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    // The argument of the ReadBarrierForRootSlow is not a managed
+    // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
+    // thus we need a 64-bit move here, and we cannot use
+    //
+    //   arm64_codegen->MoveLocation(
+    //       LocationFrom(calling_convention.GetRegisterAt(0)),
+    //       root_,
+    //       type);
+    //
+    // which would emit a 32-bit move, as `type` is a (32-bit wide)
+    // reference type (`Primitive::kPrimNot`).
+    __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
+
+ private:
+  HInstruction* const instruction_;
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -1402,13 +1661,25 @@
 }
 
 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps for an object field get when read barriers
+    // are enabled: we do not want the load to overwrite the object's
+    // location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
@@ -1437,7 +1708,11 @@
   }
 
   if (field_type == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+    LocationSummary* locations = instruction->GetLocations();
+    Location base = locations->InAt(0);
+    Location out = locations->Out();
+    uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+    codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
   }
 }
 
@@ -1614,6 +1889,82 @@
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+         instruction->GetType() == Primitive::kPrimLong);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  if (instruction->GetInstrKind() == HInstruction::kNeg) {
+    locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
+    HArm64DataProcWithShifterOp* instruction) {
+  Primitive::Type type = instruction->GetType();
+  HInstruction::InstructionKind kind = instruction->GetInstrKind();
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+  Register out = OutputRegister(instruction);
+  Register left;
+  if (kind != HInstruction::kNeg) {
+    left = InputRegisterAt(instruction, 0);
+  }
+  // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+  // shifter operand operation, the IR generating `right_reg` (input to the type
+  // conversion) can have a different type from the current instruction's type,
+  // so we manually indicate the type.
+  Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
+  int64_t shift_amount = (type == Primitive::kPrimInt)
+    ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue)
+    : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue);
+
+  Operand right_operand(0);
+
+  HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+    right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
+  } else {
+    right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+  }
+
+  // Logical binary operations do not support extension operations in the
+  // operand. Note that VIXL would still manage if it was passed by generating
+  // the extension as a separate instruction.
+  // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
+  DCHECK(!right_operand.IsExtendedRegister() ||
+         (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
+          kind != HInstruction::kNeg));
+  switch (kind) {
+    case HInstruction::kAdd:
+      __ Add(out, left, right_operand);
+      break;
+    case HInstruction::kAnd:
+      __ And(out, left, right_operand);
+      break;
+    case HInstruction::kNeg:
+      DCHECK(instruction->InputAt(0)->AsConstant()->IsZero());
+      __ Neg(out, right_operand);
+      break;
+    case HInstruction::kOr:
+      __ Orr(out, left, right_operand);
+      break;
+    case HInstruction::kSub:
+      __ Sub(out, left, right_operand);
+      break;
+    case HInstruction::kXor:
+      __ Eor(out, left, right_operand);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation kind: " << kind;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1671,22 +2022,33 @@
 }
 
 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
   Primitive::Type type = instruction->GetType();
   Register obj = InputRegisterAt(instruction, 0);
-  Location index = instruction->GetLocations()->InAt(1);
-  size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
+  LocationSummary* locations = instruction->GetLocations();
+  Location index = locations->InAt(1);
+  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   MemOperand source = HeapOperand(obj);
   CPURegister dest = OutputCPURegister(instruction);
 
@@ -1718,8 +2080,22 @@
   codegen_->Load(type, dest, source);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
 
-  if (instruction->GetType() == Primitive::kPrimNot) {
-    GetAssembler()->MaybeUnpoisonHeapReference(dest.W());
+  if (type == Primitive::kPrimNot) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    Location obj_loc = locations->InAt(0);
+    Location out = locations->Out();
+    if (index.IsConstant()) {
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+    } else {
+      // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
+      // not contain the base address of the array object, which is
+      // needed by the read barrier entry point. So the read barrier
+      // slow path will temporarily set back `obj_loc` to the right
+      // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
+      codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+    }
   }
 }
 
@@ -1737,12 +2113,19 @@
 }
 
 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool object_array_set_with_read_barrier =
+      kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+      (may_need_runtime_call_for_type_check  || object_array_set_with_read_barrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+  if (Primitive::IsFloatingPointType(value_type)) {
     locations->SetInAt(2, Location::RequiresFpuRegister());
   } else {
     locations->SetInAt(2, Location::RequiresRegister());
@@ -1752,7 +2135,7 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   LocationSummary* locations = instruction->GetLocations();
-  bool may_need_runtime_call = locations->CanCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
@@ -1766,7 +2149,7 @@
   BlockPoolsScope block_pools(masm);
 
   if (!needs_write_barrier) {
-    DCHECK(!may_need_runtime_call);
+    DCHECK(!may_need_runtime_call_for_type_check);
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
       destination = HeapOperand(array, offset);
@@ -1816,7 +2199,7 @@
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
 
-      if (may_need_runtime_call) {
+      if (may_need_runtime_call_for_type_check) {
         slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
         codegen_->AddSlowPath(slow_path);
         if (instruction->GetValueCanBeNull()) {
@@ -1831,26 +2214,66 @@
           __ Bind(&non_zero);
         }
 
-        Register temp2 = temps.AcquireSameSizeAs(array);
-        __ Ldr(temp, HeapOperand(array, class_offset));
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        GetAssembler()->MaybeUnpoisonHeapReference(temp);
-        __ Ldr(temp, HeapOperand(temp, component_offset));
-        __ Ldr(temp2, HeapOperand(Register(value), class_offset));
-        // No need to poison/unpoison, we're comparing two poisoned references.
-        __ Cmp(temp, temp2);
-        if (instruction->StaticTypeOfArrayIsObjectArray()) {
-          vixl::Label do_put;
-          __ B(eq, &do_put);
-          GetAssembler()->MaybeUnpoisonHeapReference(temp);
-          __ Ldr(temp, HeapOperand(temp, super_offset));
-          // No need to unpoison, we're comparing against null.
-          __ Cbnz(temp, slow_path->GetEntryLabel());
-          __ Bind(&do_put);
+        if (kEmitCompilerReadBarrier) {
+          // When read barriers are enabled, the type checking
+          // instrumentation requires two read barriers:
+          //
+          //   __ Mov(temp2, temp);
+          //   // /* HeapReference<Class> */ temp = temp->component_type_
+          //   __ Ldr(temp, HeapOperand(temp, component_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+          //
+          //   // /* HeapReference<Class> */ temp2 = value->klass_
+          //   __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          //   codegen_->GenerateReadBarrier(
+          //       instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
+          //
+          //   __ Cmp(temp, temp2);
+          //
+          // However, the second read barrier may trash `temp`, as it
+          // is a temporary register, and as such would not be saved
+          // along with live registers before calling the runtime (nor
+          // restored afterwards).  So in this case, we bail out and
+          // delegate the work to the array set slow path.
+          //
+          // TODO: Extend the register allocator to support a new
+          // "(locally) live temp" location so as to avoid always
+          // going into the slow path when read barriers are enabled.
+          __ B(slow_path->GetEntryLabel());
         } else {
-          __ B(ne, slow_path->GetEntryLabel());
+          Register temp2 = temps.AcquireSameSizeAs(array);
+          // /* HeapReference<Class> */ temp = array->klass_
+          __ Ldr(temp, HeapOperand(array, class_offset));
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+          // /* HeapReference<Class> */ temp = temp->component_type_
+          __ Ldr(temp, HeapOperand(temp, component_offset));
+          // /* HeapReference<Class> */ temp2 = value->klass_
+          __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+          // If heap poisoning is enabled, no need to unpoison `temp`
+          // nor `temp2`, as we are comparing two poisoned references.
+          __ Cmp(temp, temp2);
+
+          if (instruction->StaticTypeOfArrayIsObjectArray()) {
+            vixl::Label do_put;
+            __ B(eq, &do_put);
+            // If heap poisoning is enabled, the `temp` reference has
+            // not been unpoisoned yet; unpoison it now.
+            GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+            // /* HeapReference<Class> */ temp = temp->super_class_
+            __ Ldr(temp, HeapOperand(temp, super_offset));
+            // If heap poisoning is enabled, no need to unpoison
+            // `temp`, as we are comparing against null below.
+            __ Cbnz(temp, slow_path->GetEntryLabel());
+            __ Bind(&do_put);
+          } else {
+            __ B(ne, slow_path->GetEntryLabel());
+          }
+          temps.Release(temp2);
         }
-        temps.Release(temp2);
       }
 
       if (kPoisonHeapReferences) {
@@ -1866,7 +2289,7 @@
       }
       __ Str(source, destination);
 
-      if (!may_need_runtime_call) {
+      if (!may_need_runtime_call_for_type_check) {
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       }
     }
@@ -2533,40 +2956,44 @@
 
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // The out register is used as a temporary, so it overlaps with the inputs.
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
-    locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM64 uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
+  Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
@@ -2582,15 +3009,9 @@
     __ Cbz(obj, &zero);
   }
 
-  // In case of an interface/unresolved check, we put the object class into the object register.
-  // This is safe, as the register is caller-save, and the object must be in another
-  // register if it survives the runtime call.
-  Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
-      (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
-      ? obj
-      : out;
-  __ Ldr(target, HeapOperand(obj.W(), class_offset));
-  GetAssembler()->MaybeUnpoisonHeapReference(target);
+  // /* HeapReference<Class> */ out = obj->klass_
+  __ Ldr(out, HeapOperand(obj.W(), class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
 
   switch (instruction->GetTypeCheckKind()) {
     case TypeCheckKind::kExactCheck: {
@@ -2601,13 +3022,23 @@
       }
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl::Label loop, success;
       __ Bind(&loop);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -2618,14 +3049,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       vixl::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->super_class_
       __ Ldr(out, HeapOperand(out, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -2636,14 +3077,24 @@
       }
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
       vixl::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `out` into `temp` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp = WRegisterFrom(temp_loc);
+        __ Mov(temp, out);
+      }
+      // /* HeapReference<Class> */ out = out->component_type_
       __ Ldr(out, HeapOperand(out, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(out);
+      codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -2654,11 +3105,12 @@
       __ B(&done);
       break;
     }
+
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
-      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-          instruction, /* is_fatal */ false);
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ B(ne, slow_path->GetEntryLabel());
       __ Mov(out, 1);
@@ -2667,13 +3119,25 @@
       }
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-    default: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                                      /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
       if (zero.IsLinked()) {
         __ B(&done);
       }
@@ -2699,58 +3163,62 @@
   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
 
-  switch (instruction->GetTypeCheckKind()) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = throws_into_catch
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall;
-      break;
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCall;
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
       break;
     case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
       call_kind = LocationSummary::kCallOnSlowPath;
       break;
   }
 
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, call_kind);
-  if (call_kind != LocationSummary::kCall) {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-    // Note that TypeCheckSlowPathARM64 uses this register too.
-    locations->AddTemp(Location::RequiresRegister());
-  } else {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (kEmitCompilerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+     locations->AddTemp(Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
-  Register temp;
-  if (!locations->WillCall()) {
-    temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
-  }
-
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = WRegisterFrom(temp_loc);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  SlowPathCodeARM64* slow_path = nullptr;
 
-  if (!locations->WillCall()) {
-    slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
-        instruction, !locations->CanCall());
-    codegen_->AddSlowPath(slow_path);
-  }
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCodeARM64* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+                                                          is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
 
   vixl::Label done;
   // Avoid null check if we know obj is not null.
@@ -2758,76 +3226,159 @@
     __ Cbz(obj, &done);
   }
 
-  if (locations->WillCall()) {
-    __ Ldr(obj, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(obj);
-  } else {
-    __ Ldr(temp, HeapOperand(obj, class_offset));
-    GetAssembler()->MaybeUnpoisonHeapReference(temp);
-  }
+  // /* HeapReference<Class> */ temp = obj->klass_
+  __ Ldr(temp, HeapOperand(obj, class_offset));
+  codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
 
-  switch (instruction->GetTypeCheckKind()) {
+  switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
-      __ B(ne, slow_path->GetEntryLabel());
+      __ B(ne, type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kAbstractClassCheck: {
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::Label loop;
+      vixl::Label loop, compare_classes;
       __ Bind(&loop);
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      // Jump to the slow path to throw the exception.
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // to the `compare_classes` label to compare it with the checked
+      // class.
+      __ Cbnz(temp, &compare_classes);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&compare_classes);
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
     }
+
     case TypeCheckKind::kClassHierarchyCheck: {
       // Walk over the class hierarchy to find a match.
       vixl::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
       __ B(eq, &done);
+
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->super_class_
       __ Ldr(temp, HeapOperand(temp, super_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+      // If the class reference currently in `temp` is not null, jump
+      // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
-      // Jump to the slow path to throw the exception.
-      __ B(slow_path->GetEntryLabel());
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kArrayObjectCheck: {
       // Do an exact check.
+      vixl::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
-      // Otherwise, we need to check that the object's class is a non primitive array.
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      Location temp2_loc =
+          kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+      if (kEmitCompilerReadBarrier) {
+        // Save the value of `temp` into `temp2` before overwriting it
+        // in the following move operation, as we will need it for the
+        // read barrier below.
+        Register temp2 = WRegisterFrom(temp2_loc);
+        __ Mov(temp2, temp);
+      }
+      // /* HeapReference<Class> */ temp = temp->component_type_
       __ Ldr(temp, HeapOperand(temp, component_offset));
-      GetAssembler()->MaybeUnpoisonHeapReference(temp);
-      __ Cbz(temp, slow_path->GetEntryLabel());
+      codegen_->MaybeGenerateReadBarrier(
+          instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+      // If the component type is not null (i.e. the object is indeed
+      // an array), jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive
+      // type.
+      __ Cbnz(temp, &check_non_primitive_component_type);
+      // Otherwise, jump to the slow path to throw the exception.
+      //
+      // But before, move back the object's class into `temp` before
+      // going into the slow path, as it has been overwritten in the
+      // meantime.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
+
+      __ Bind(&check_non_primitive_component_type);
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbnz(temp, slow_path->GetEntryLabel());
+      __ Cbz(temp, &done);
+      // Same comment as above regarding `temp` and the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      __ Ldr(temp, HeapOperand(obj, class_offset));
+      codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
+
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-    default:
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
-                              instruction,
-                              instruction->GetDexPc(),
-                              nullptr);
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ B(type_check_slow_path->GetEntryLabel());
       break;
   }
   __ Bind(&done);
 
-  if (slow_path != nullptr) {
-    __ Bind(slow_path->GetExitLabel());
-  }
+  __ Bind(type_check_slow_path->GetExitLabel());
 }
 
 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
@@ -2870,10 +3421,11 @@
 
 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  LocationSummary* locations = invoke->GetLocations();
+  Register temp = XRegisterFrom(locations->GetTemp(0));
   uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
       invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
-  Location receiver = invoke->GetLocations()->InAt(0);
+  Location receiver = locations->InAt(0);
   Offset class_offset = mirror::Object::ClassOffset();
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
 
@@ -2885,14 +3437,22 @@
   scratch_scope.Exclude(ip1);
   __ Mov(ip1, invoke->GetDexMethodIndex());
 
-  // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ Ldr(temp.W(), StackOperandFrom(receiver));
+    // /* HeapReference<Class> */ temp = temp->klass_
     __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
   } else {
+    // /* HeapReference<Class> */ temp = receiver->klass_
     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -3014,7 +3574,7 @@
         __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
       }
 
-      // temp = current_method->dex_cache_resolved_methods_;
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
       __ Ldr(reg.X(),
              MemOperand(method_reg.X(),
                         ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
@@ -3058,8 +3618,12 @@
 }
 
 void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   Register temp = XRegisterFrom(temp_in);
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
@@ -3069,8 +3633,15 @@
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   DCHECK(receiver.IsRegister());
-  __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
@@ -3183,7 +3754,8 @@
   CodeGenerator::CreateLoadClassLocationSummary(
       cls,
       LocationFrom(calling_convention.GetRegisterAt(0)),
-      LocationFrom(vixl::x0));
+      LocationFrom(vixl::x0),
+      /* code_generator_supports_read_barrier */ true);
 }
 
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3197,17 +3769,38 @@
     return;
   }
 
+  Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
   Register current_method = InputRegisterAt(cls, 0);
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
-    __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+    uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+      __ Add(out.X(), current_method.X(), declaring_class_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+    }
   } else {
     MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+    // /* GcRoot<mirror::Class>[] */ out =
+    //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
     __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
-    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
-    // TODO: We will need a read barrier here.
+
+    size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+    if (kEmitCompilerReadBarrier) {
+      // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+      __ Add(out.X(), out.X(), cache_offset);
+      // /* mirror::Class* */ out = out->Read()
+      codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+    } else {
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      __ Ldr(out, MemOperand(out.X(), cache_offset));
+    }
 
     if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
       DCHECK(cls->CanCallRuntime());
@@ -3267,12 +3860,35 @@
   SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
   codegen_->AddSlowPath(slow_path);
 
+  Location out_loc = load->GetLocations()->Out();
   Register out = OutputRegister(load);
   Register current_method = InputRegisterAt(load, 0);
-  __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
-  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
-  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
-  // TODO: We will need a read barrier here.
+
+  uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+    __ Add(out.X(), current_method.X(), declaring_class_offset);
+    // /* mirror::Class* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+    __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+  }
+
+  // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+  __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+
+  size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+  if (kEmitCompilerReadBarrier) {
+    // /* GcRoot<mirror::String>* */ out = &out[string_index]
+    __ Add(out.X(), out.X(), cache_offset);
+    // /* mirror::String* */ out = out->Read()
+    codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+  } else {
+    // /* GcRoot<mirror::String> */ out = out[string_index]
+    __ Ldr(out, MemOperand(out.X(), cache_offset));
+  }
+
   __ Cbz(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -3852,9 +4468,7 @@
     int min_size = std::min(result_size, input_size);
     Register output = OutputRegister(conversion);
     Register source = InputRegisterAt(conversion, 0);
-    if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
-      __ Ubfx(output, source, 0, result_size * kBitsPerByte);
-    } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
       // 'int' values are used directly as W registers, discarding the top
       // bits, so we don't need to sign-extend and can just perform a move.
       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
@@ -3863,9 +4477,11 @@
       // 32bit input value as a 64bit value assuming that the top 32 bits are
       // zero.
       __ Mov(output.W(), source.W());
-    } else if ((result_type == Primitive::kPrimChar) ||
-               ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      __ Ubfx(output,
+              output.IsX() ? source.X() : source.W(),
+              0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
     } else {
       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     }
@@ -3942,20 +4558,29 @@
   // ranges and emit the tables only as required.
   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
 
-  if (num_entries < kPackedSwitchJumpTableThreshold ||
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
       // Current instruction id is an upper bound of the number of HIRs in the graph.
       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
     // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+    Register temp = temps.AcquireW();
+    __ Subs(temp, value_reg, Operand(lower_bound));
+
     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-    for (uint32_t i = 0; i < num_entries; i++) {
-      int32_t case_value = lower_bound + i;
-      vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
-      if (case_value == 0) {
-        __ Cbz(value_reg, succ);
-      } else {
-        __ Cmp(value_reg, Operand(case_value));
-        __ B(eq, succ);
-      }
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Subs(temp, temp, Operand(2));
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp, Operand(1));
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
     }
 
     // And the default for any other value.
@@ -4000,6 +4625,82 @@
   }
 }
 
+void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
+                                             Location out,
+                                             Location ref,
+                                             Location obj,
+                                             uint32_t offset,
+                                             Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  // TODO: When read barrier has a fast path, add it here.
+  /* Currently the read barrier call is inserted after the original load.
+   * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+   * original load. This load-load ordering is required by the read barrier.
+   * The fast path/slow path (for Baker's algorithm) should look like:
+   *
+   * bool isGray = obj.LockWord & kReadBarrierMask;
+   * lfence;  // load fence or artificial data dependence to prevent load-load reordering
+   * ref = obj.field;    // this is the original load
+   * if (isGray) {
+   *   ref = Mark(ref);  // ideally the slow path just does Mark(ref)
+   * }
+   */
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
+  }
+}
+
+void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
+                                                    Location out,
+                                                    Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  // TODO: Implement a fast path for ReadBarrierForRoot, performing
+  // the following operation (for Baker's algorithm):
+  //
+  //   if (thread.tls32_.is_gc_marking) {
+  //     root = Mark(root);
+  //   }
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 881afcc..7950f07 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -424,6 +424,51 @@
 
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
 
+  // Generate a read barrier for a heap reference within `instruction`.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrier(HInstruction* instruction,
+                           Location out,
+                           Location ref,
+                           Location obj,
+                           uint32_t offset,
+                           Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap reference.
+  // If heap poisoning is enabled, also unpoison the reference in `out`.
+  void MaybeGenerateReadBarrier(HInstruction* instruction,
+                                Location out,
+                                Location ref,
+                                Location obj,
+                                uint32_t offset,
+                                Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction`.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
  private:
   using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
   using MethodToLiteralMap = ArenaSafeMap<MethodReference,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 65af9ee..d092de9 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4394,19 +4394,31 @@
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
   // Create a set of compare/jumps.
+  Register temp_reg = TMP;
+  __ Addiu32(temp_reg, value_reg, -lower_bound);
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; ++i) {
-    int32_t case_value = lower_bound + i;
-    MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqz(value_reg, successor_label);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beq(value_reg, TMP, successor_label);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
-  // Insert the default branch for every other value.
+  // And the default for any other value.
   if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
     __ B(codegen_->GetLabelOf(default_block));
   }
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 934f24b..78f5644 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2986,8 +2986,13 @@
 }
 
 void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  GpuRegister receiver = calling_convention.GetRegisterAt(0);
+
   GpuRegister temp = temp_location.AsRegister<GpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
@@ -2995,8 +3000,7 @@
   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
 
   // temp = object->GetClass();
-  DCHECK(receiver.IsRegister());
-  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
+  __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
@@ -3971,17 +3975,34 @@
   GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
-  // Create a series of compare/jumps.
+  // Create a set of compare/jumps.
+  GpuRegister temp_reg = TMP;
+  if (IsInt<16>(-lower_bound)) {
+    __ Addiu(temp_reg, value_reg, -lower_bound);
+  } else {
+    __ LoadConst32(AT, -lower_bound);
+    __ Addu(temp_reg, value_reg, AT);
+  }
+  // Jump to default if index is negative
+  // Note: We don't check the case that index is positive while value < lower_bound, because in
+  // this case, index >= num_entries must be true. So that we can save one branch instruction.
+  __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
   const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int32_t i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
-    if (case_value == 0) {
-      __ Beqzc(value_reg, succ);
-    } else {
-      __ LoadConst32(TMP, case_value);
-      __ Beqc(value_reg, TMP, succ);
-    }
+  // Jump to successors[0] if value == lower_bound.
+  __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+  int32_t last_index = 0;
+  for (; num_entries - last_index > 2; last_index += 2) {
+    __ Addiu(temp_reg, temp_reg, -2);
+    // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+    __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+    // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+  }
+  if (num_entries - last_index == 2) {
+    // The last missing case_value.
+    __ Addiu(temp_reg, temp_reg, -1);
+    __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
   }
 
   // And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1fc09a8..19f03df 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,7 +42,6 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = EAX;
-
 static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
 
 static constexpr int kC2ConditionMask = 0x400;
@@ -1929,8 +1928,7 @@
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
   if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(),
-                                    Location::RequiresRegister());
+    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 
   if (codegen_->IsBaseline()) {
@@ -1970,6 +1968,11 @@
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
@@ -4151,12 +4154,16 @@
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+  __ movl(temp, Address(receiver, class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -6418,31 +6425,67 @@
   locations->SetInAt(0, Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
-  int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
-  LocationSummary* locations = switch_instr->GetLocations();
-  Register value_reg = locations->InAt(0).AsRegister<Register>();
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+                                                              int32_t lower_bound,
+                                                              uint32_t num_entries,
+                                                              HBasicBlock* switch_block,
+                                                              HBasicBlock* default_block) {
+  // Figure out the correct compare values and jump conditions.
+  // Handle the first compare/branch as a special case because it might
+  // jump to the default case.
+  DCHECK_GT(num_entries, 2u);
+  Condition first_condition;
+  uint32_t index;
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  if (lower_bound != 0) {
+    first_condition = kLess;
+    __ cmpl(value_reg, Immediate(lower_bound));
+    __ j(first_condition, codegen_->GetLabelOf(default_block));
+    __ j(kEqual, codegen_->GetLabelOf(successors[0]));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+    index = 1;
+  } else {
+    // Handle all the compare/jumps below.
+    first_condition = kBelow;
+    index = 0;
+  }
+
+  // Handle the rest of the compare/jumps.
+  for (; index + 1 < num_entries; index += 2) {
+    int32_t compare_to_value = lower_bound + index + 1;
+    __ cmpl(value_reg, Immediate(compare_to_value));
+    // Jump to successors[index] if value < case_value[index].
+    __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+    // Jump to successors[index + 1] if value == case_value[index + 1].
+    __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+  }
+
+  if (index != num_entries) {
+    // There are an odd number of entries. Handle the last one.
+    DCHECK_EQ(index + 1, num_entries);
+    __ cmpl(value_reg, Immediate(lower_bound + index));
+    __ j(kEqual, codegen_->GetLabelOf(successors[index]));
   }
 
   // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
+  if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+    __ jmp(codegen_->GetLabelOf(default_block));
   }
 }
 
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+  GenPackedSwitchWithCompares(value_reg,
+                              lower_bound,
+                              num_entries,
+                              switch_instr->GetBlock(),
+                              switch_instr->GetDefaultBlock());
+}
+
 void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6457,11 +6500,20 @@
 
 void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   Register value_reg = locations->InAt(0).AsRegister<Register>();
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
 
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    GenPackedSwitchWithCompares(value_reg,
+                                lower_bound,
+                                num_entries,
+                                switch_instr->GetBlock(),
+                                default_block);
+    return;
+  }
+
   // Optimizing has a jump area.
   Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
   Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6473,7 +6525,7 @@
   }
 
   // Is the value in range?
-  DCHECK_GE(num_entries, 1);
+  DCHECK_GE(num_entries, 1u);
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 064051c..f9403a6 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -195,6 +195,11 @@
 
   X86Assembler* GetAssembler() const { return assembler_; }
 
+  // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+  // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+  // generates less code/data with a small num_entries.
+  static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
  private:
   // Generate code for the given suspend check. If not null, `successor`
   // is the block to branch to if the suspend check is not needed, and after
@@ -236,6 +241,11 @@
   void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenPackedSwitchWithCompares(Register value_reg,
+                                   int32_t lower_bound,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block,
+                                   HBasicBlock* default_block);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 534ee1c..44a51ea 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,6 +41,10 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
 
 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -802,12 +806,17 @@
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
-  LocationSummary* locations = invoke->GetLocations();
-  Location receiver = locations->InAt(0);
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  Register receiver = calling_convention.GetRegisterAt(0);
+
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
-  DCHECK(receiver.IsRegister());
   // /* HeapReference<Class> */ temp = receiver->klass_
-  __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+  __ movl(temp, Address(CpuRegister(receiver), class_offset));
   MaybeRecordImplicitNullCheck(invoke);
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
@@ -6016,11 +6025,58 @@
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
-  int32_t num_entries = switch_instr->GetNumEntries();
+  uint32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
   CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
   CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
   CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  // Should we generate smaller inline compare/jumps?
+  if (num_entries <= kPackedSwitchJumpTableThreshold) {
+    // Figure out the correct compare values and jump conditions.
+    // Handle the first compare/branch as a special case because it might
+    // jump to the default case.
+    DCHECK_GT(num_entries, 2u);
+    Condition first_condition;
+    uint32_t index;
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    if (lower_bound != 0) {
+      first_condition = kLess;
+      __ cmpl(value_reg_in, Immediate(lower_bound));
+      __ j(first_condition, codegen_->GetLabelOf(default_block));
+      __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+      index = 1;
+    } else {
+      // Handle all the compare/jumps below.
+      first_condition = kBelow;
+      index = 0;
+    }
+
+    // Handle the rest of the compare/jumps.
+    for (; index + 1 < num_entries; index += 2) {
+      int32_t compare_to_value = lower_bound + index + 1;
+      __ cmpl(value_reg_in, Immediate(compare_to_value));
+      // Jump to successors[index] if value < case_value[index].
+      __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+      // Jump to successors[index + 1] if value == case_value[index + 1].
+      __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+    }
+
+    if (index != num_entries) {
+      // There are an odd number of entries. Handle the last one.
+      DCHECK_EQ(index + 1, num_entries);
+      __ cmpl(value_reg_in, Immediate(lower_bound + index));
+      __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ jmp(codegen_->GetLabelOf(default_block));
+    }
+    return;
+  }
 
   // Remove the bias, if needed.
   Register value_reg_out = value_reg_in.AsRegister();
@@ -6031,7 +6087,6 @@
   CpuRegister value_reg(value_reg_out);
 
   // Is the value in range?
-  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
   __ cmpl(value_reg, Immediate(num_entries - 1));
   __ j(kAbove, codegen_->GetLabelOf(default_block));
 
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index e1a8c9c..af8b8b5 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
 
+#include "code_generator.h"
 #include "locations.h"
 #include "nodes.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -255,6 +256,67 @@
   return true;
 }
 
+static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
+    case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
+    case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::NO_SHIFT;
+  }
+}
+
+static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+  switch (op_kind) {
+    case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
+    case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
+    case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
+    case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
+    case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
+    case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+    default:
+      LOG(FATAL) << "Unexpected op kind " << op_kind;
+      UNREACHABLE();
+      return vixl::NO_EXTEND;
+  }
+}
+
+static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+  if (instruction->IsTypeConversion()) {
+    HTypeConversion* conversion = instruction->AsTypeConversion();
+    Primitive::Type result_type = conversion->GetResultType();
+    Primitive::Type input_type = conversion->GetInputType();
+    // We don't expect to see the same type as input and result.
+    return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+        (result_type != input_type);
+  } else {
+    return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+        (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+  }
+}
+
+static inline bool HasShifterOperand(HInstruction* instr) {
+  // `neg` instructions are an alias of `sub` using the zero register as the
+  // first register input.
+  bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
+      instr->IsOr() || instr->IsSub() || instr->IsXor();
+  return res;
+}
+
+static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+  DCHECK(HasShifterOperand(instruction));
+  // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
+  // does *not* support extension. This is because the `extended register` form
+  // of the `sub` instruction interprets the left register with code 31 as the
+  // stack pointer and not the zero register. (So does the `immediate` form.) In
+  // the other form `shifted register, the register with code 31 is interpreted
+  // as the zero register.
+  return instruction->IsAdd() || instruction->IsSub();
+}
+
 }  // namespace helpers
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000..6582063
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+  explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+      : HGraphVisitor(graph),
+        dex_cache_array_bases_(std::less<const DexFile*>(),
+                               // Attribute memory use to code generator.
+                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+  void MoveBasesIfNeeded() {
+    for (const auto& entry : dex_cache_array_bases_) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      HArmDexCacheArraysBase* base = entry.second;
+      base->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
+ private:
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative access to the dex cache methods array,
+    // we need to add the dex cache arrays base as the special input.
+    if (invoke->HasPcRelativeDexCache()) {
+      // Initialize base for target method dex file if needed.
+      MethodReference target_method = invoke->GetTargetMethod();
+      HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+      // Update the element offset in base.
+      DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+      base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base);
+    }
+  }
+
+  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+    // Ensure we only initialize the pointer once for each dex file.
+    auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+    if (lb != dex_cache_array_bases_.end() &&
+        !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+      return lb->second;
+    }
+
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+    dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+    return base;
+  }
+
+  using DexCacheArraysBaseMap =
+      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+  DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+  DexCacheArrayFixupsVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
+  visitor.MoveBasesIfNeeded();
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000..015f910
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+  DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+
+  void Run() OVERRIDE;
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 5814d75..c16b872 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -735,26 +735,31 @@
     }
   }
 
-  // Test phi equivalents. There should not be two of the same type and they
-  // should only be created for constants which were untyped in DEX.
-  for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-    HPhi* other_phi = phi_it.Current()->AsPhi();
-    if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
-      if (phi->GetType() == other_phi->GetType()) {
-        std::stringstream type_str;
-        type_str << phi->GetType();
-        AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
-                              phi->GetId(),
-                              phi->GetRegNumber(),
-                              type_str.str().c_str()));
-      } else {
-        ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
-        if (!IsConstantEquivalent(phi, other_phi, &visited)) {
-          AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
-                                "are not equivalents of constants.",
+  // Test phi equivalents. There should not be two of the same type and they should only be
+  // created for constants which were untyped in DEX. Note that this test can be skipped for
+  // a synthetic phi (indicated by lack of a virtual register).
+  if (phi->GetRegNumber() != kNoRegNumber) {
+    for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+         !phi_it.Done();
+         phi_it.Advance()) {
+      HPhi* other_phi = phi_it.Current()->AsPhi();
+      if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
+        if (phi->GetType() == other_phi->GetType()) {
+          std::stringstream type_str;
+          type_str << phi->GetType();
+          AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
                                 phi->GetId(),
-                                other_phi->GetId(),
-                                phi->GetRegNumber()));
+                                phi->GetRegNumber(),
+                                type_str.str().c_str()));
+        } else {
+          ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
+          if (!IsConstantEquivalent(phi, other_phi, &visited)) {
+            AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
+                                  "are not equivalents of constants.",
+                                  phi->GetId(),
+                                  other_phi->GetId(),
+                                  phi->GetRegNumber()));
+          }
         }
       }
     }
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4438190..e9fdb84 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -393,15 +393,18 @@
 
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
     VisitInvoke(invoke);
-    StartAttributeStream("recursive") << std::boolalpha
-                                      << invoke->IsRecursive()
-                                      << std::noboolalpha;
+    StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
     StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
     if (invoke->IsStatic()) {
       StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
     }
   }
 
+  void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+    VisitInvoke(invoke);
+    StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+  }
+
   void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
     StartAttributeStream("field_type") << field_access->GetFieldType();
   }
@@ -423,6 +426,13 @@
   }
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
+    if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+      StartAttributeStream("shift") << instruction->GetShiftAmount();
+    }
+  }
+
   void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
   }
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index fdf8cc9..0b7fdf8 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -705,7 +705,8 @@
       return loop_it->second;
     }
   }
-  if (loop->IsLoopInvariant(instruction, true)) {
+  if (loop->IsDefinedOutOfTheLoop(instruction)) {
+    DCHECK(instruction->GetBlock()->Dominates(loop->GetPreHeader()));
     InductionInfo* info = CreateInvariantFetch(instruction);
     AssignInfo(loop, instruction, info);
     return info;
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index b40ef5a..9d0cde7 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -119,6 +119,17 @@
   }
 }
 
+bool InductionVarRange::RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val) {
+  Value v1 = RefineOuter(*min_val, /* is_min */ true);
+  Value v2 = RefineOuter(*max_val, /* is_min */ false);
+  if (v1.instruction != min_val->instruction || v2.instruction != max_val->instruction) {
+    *min_val = v1;
+    *max_val = v2;
+    return true;
+  }
+  return false;
+}
+
 bool InductionVarRange::CanGenerateCode(HInstruction* context,
                                         HInstruction* instruction,
                                         /*out*/bool* needs_finite_test,
@@ -202,6 +213,8 @@
     } else if (IsIntAndGet(instruction->InputAt(1), &value)) {
       return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(value));
     }
+  } else if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+    return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
   } else if (is_min) {
     // Special case for finding minimum: minimum of trip-count in loop-body is 1.
     if (trip != nullptr && in_body && instruction == trip->op_a->fetch) {
@@ -404,6 +417,25 @@
   return Value();
 }
 
+InductionVarRange::Value InductionVarRange::RefineOuter(Value v, bool is_min) {
+  if (v.instruction != nullptr) {
+    HLoopInformation* loop =
+        v.instruction->GetBlock()->GetLoopInformation();  // closest enveloping loop
+    if (loop != nullptr) {
+      // Set up loop information.
+      bool in_body = true;  // use is always in body of outer loop
+      HInductionVarAnalysis::InductionInfo* info =
+          induction_analysis_->LookupInfo(loop, v.instruction);
+      HInductionVarAnalysis::InductionInfo* trip =
+          induction_analysis_->LookupInfo(loop, loop->GetHeader()->GetLastInstruction());
+      // Try to refine "a x instruction + b" with outer loop range information on instruction.
+      return AddValue(MulValue(Value(v.a_constant), GetVal(info, trip, in_body, is_min)),
+                      Value(v.b_constant));
+    }
+  }
+  return v;
+}
+
 bool InductionVarRange::GenerateCode(HInstruction* context,
                                      HInstruction* instruction,
                                      HGraph* graph,
@@ -425,9 +457,13 @@
     }
     HInductionVarAnalysis::InductionInfo* trip =
         induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
-    // Determine what tests are needed.
+    // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+    // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+    // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+    // code does not use the trip-count explicitly (since there could be an implicit relation
+    // between e.g. an invariant subscript and a not-taken condition).
     *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
-    *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip);
+    *needs_taken_test = IsBodyTripCount(trip);
     // Code generation for taken test: generate the code when requested or otherwise analyze
     // if code generation is feasible when taken test is needed.
     if (taken_test != nullptr) {
@@ -512,10 +548,13 @@
             }
             break;
           case HInductionVarAnalysis::kFetch:
-            if (graph != nullptr) {
-              *result = info->fetch;  // already in HIR
+            if (info->fetch->GetType() == type) {
+              if (graph != nullptr) {
+                *result = info->fetch;  // already in HIR
+              }
+              return true;
             }
-            return true;
+            break;
           case HInductionVarAnalysis::kTripCountInLoop:
           case HInductionVarAnalysis::kTripCountInLoopUnsafe:
             if (!in_body && !is_min) {  // one extra!
@@ -545,29 +584,42 @@
         }
         break;
       case HInductionVarAnalysis::kLinear: {
-          // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
-          // to avoid arithmetic wrap-around situations that are hard to guard against.
-          int32_t stride_value = 0;
-          if (GetConstant(info->op_a, &stride_value)) {
-            if (stride_value == 1 || stride_value == -1) {
-              const bool is_min_a = stride_value == 1 ? is_min : !is_min;
-              if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
-                  GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
-                if (graph != nullptr) {
-                  HInstruction* oper;
-                  if (stride_value == 1) {
-                    oper = new (graph->GetArena()) HAdd(type, opa, opb);
-                  } else {
-                    oper = new (graph->GetArena()) HSub(type, opb, opa);
-                  }
-                  *result = Insert(block, oper);
+        // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
+        // to avoid arithmetic wrap-around situations that are hard to guard against.
+        int32_t stride_value = 0;
+        if (GetConstant(info->op_a, &stride_value)) {
+          if (stride_value == 1 || stride_value == -1) {
+            const bool is_min_a = stride_value == 1 ? is_min : !is_min;
+            if (GenerateCode(trip,       trip, graph, block, &opa, in_body, is_min_a) &&
+                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
+              if (graph != nullptr) {
+                HInstruction* oper;
+                if (stride_value == 1) {
+                  oper = new (graph->GetArena()) HAdd(type, opa, opb);
+                } else {
+                  oper = new (graph->GetArena()) HSub(type, opb, opa);
                 }
-                return true;
+                *result = Insert(block, oper);
               }
+              return true;
             }
           }
         }
         break;
+      }
+      case HInductionVarAnalysis::kWrapAround:
+      case HInductionVarAnalysis::kPeriodic: {
+        // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+        // values are easy to test at runtime without complications of arithmetic wrap-around.
+        Value extreme = GetVal(info, trip, in_body, is_min);
+        if (extreme.is_known && extreme.a_constant == 0) {
+          if (graph != nullptr) {
+            *result = graph->GetIntConstant(extreme.b_constant);
+          }
+          return true;
+        }
+        break;
+      }
       default:
         break;
     }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7984871..71b0b1b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -68,6 +68,9 @@
                          /*out*/Value* max_val,
                          /*out*/bool* needs_finite_test);
 
+  /** Refines the values with induction of next outer loop. Returns true on change. */
+  bool RefineOuter(/*in-out*/Value* min_val, /*in-out*/Value* max_val);
+
   /**
    * Returns true if range analysis is able to generate code for the lower and upper
    * bound expressions on the instruction in the given context. The need_finite_test
@@ -149,6 +152,12 @@
   static Value MergeVal(Value v1, Value v2, bool is_min);
 
   /**
+   * Returns refined value using induction of next outer loop or the input value if no
+   * further refinement is possible.
+   */
+  Value RefineOuter(Value val, bool is_min);
+
+  /**
    * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
    * With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index c2ba157..128b5bb 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -473,16 +473,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -498,16 +501,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -527,16 +533,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(0), v1);
   ExpectEqual(Value(parameter, 1, -1), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(parameter, 1, 0), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
@@ -597,16 +606,19 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   // In context of loop-body: known.
   range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(parameter, 1, 1), v1);
   ExpectEqual(Value(1000), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
   range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(parameter, 1, 0), v1);
   ExpectEqual(Value(999), v2);
+  EXPECT_FALSE(range.RefineOuter(&v1, &v2));
 
   HInstruction* lower = nullptr;
   HInstruction* upper = nullptr;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 0363f20..a4dcb3a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -171,13 +171,37 @@
                                   const DexFile& dex_file,
                                   uint32_t referrer_index)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) {
+  if (IsSameDexFile(*method->GetDexFile(), dex_file)) {
     return method->GetDexMethodIndex();
   } else {
     return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index);
   }
 }
 
+static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (cls->GetDexCache() == nullptr) {
+    DCHECK(cls->IsArrayClass());
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
+    // TODO: deal with proxy classes.
+    return DexFile::kDexNoIndex;
+  } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
+    // Update the dex cache to ensure the class is in. The generated code will
+    // consider it is. We make it safe by updating the dex cache, as other
+    // dex files might also load the class, and there is no guarantee the dex
+    // cache of the dex file of the class will be updated.
+    if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) {
+      cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls);
+    }
+    return cls->GetDexTypeIndex();
+  } else {
+    // TODO: find the class in `dex_file`.
+    return DexFile::kDexNoIndex;
+  }
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -192,6 +216,10 @@
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method;
   if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+      VLOG(compiler) << "Not inlining a String.<init> method";
+      return false;
+    }
     MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
     mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
         ? caller_compilation_unit_.GetDexCache().Get()
@@ -210,53 +238,176 @@
     return false;
   }
 
-  if (!invoke_instruction->IsInvokeStaticOrDirect()) {
-    resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
-    if (resolved_method == nullptr) {
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
+    return TryInline(invoke_instruction, resolved_method);
+  }
+
+  // Check if we can statically find the method.
+  ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
+  if (actual_method != nullptr) {
+    return TryInline(invoke_instruction, actual_method);
+  }
+
+  // Check if we can use an inline cache.
+  ArtMethod* caller = graph_->GetArtMethod();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  // Under JIT, we should always know the caller.
+  DCHECK(!Runtime::Current()->UseJit() || (caller != nullptr));
+  if (caller != nullptr && caller->GetProfilingInfo(pointer_size) != nullptr) {
+    ProfilingInfo* profiling_info = caller->GetProfilingInfo(pointer_size);
+    const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
+    if (ic.IsUnitialized()) {
       VLOG(compiler) << "Interface or virtual call to "
                      << PrettyMethod(method_index, caller_dex_file)
-                     << " could not be statically determined";
+                     << " is not hit and not inlined";
       return false;
-    }
-    // We have found a method, but we need to find where that method is for the caller's
-    // dex file.
-    method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index);
-    if (method_index == DexFile::kDexNoIndex) {
+    } else if (ic.IsMonomorphic()) {
+      MaybeRecordStat(kMonomorphicCall);
+      return TryInlineMonomorphicCall(invoke_instruction, resolved_method, ic);
+    } else if (ic.IsPolymorphic()) {
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, ic);
+    } else {
+      DCHECK(ic.IsMegamorphic());
       VLOG(compiler) << "Interface or virtual call to "
-                     << PrettyMethod(resolved_method)
-                     << " cannot be inlined because unaccessible to caller";
+                     << PrettyMethod(method_index, caller_dex_file)
+                     << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
       return false;
     }
   }
 
-  bool same_dex_file =
-      IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *resolved_method->GetDexFile());
+  VLOG(compiler) << "Interface or virtual call to "
+                 << PrettyMethod(method_index, caller_dex_file)
+                 << " could not be statically determined";
+  return false;
+}
 
-  const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
+  if (class_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because its class is not"
+                   << " accessible to the caller";
+    return false;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForInterface(
+        resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = ic.GetMonomorphicType()->FindVirtualMethodForVirtual(
+        resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      invoke_instruction->GetDexPc());
+
+  bool is_referrer =
+      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(load_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(field_get, load_class);
+  bb_cursor->InsertInstructionAfter(compare, field_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed, and eventually propagate the
+  // type of the receiver.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedMonomorphicCall);
+  return true;
+}
+
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+                                        ArtMethod* resolved_method,
+                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
+  // TODO
+  VLOG(compiler) << "Unimplemented polymorphic inlining for "
+                 << PrettyMethod(resolved_method);
+  return false;
+}
+
+bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  uint32_t method_index = FindMethodIndexIn(
+      method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
+  if (method_index == DexFile::kDexNoIndex) {
+    VLOG(compiler) << "Call to "
+                   << PrettyMethod(method)
+                   << " cannot be inlined because unaccessible to caller";
+    return false;
+  }
+
+  bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile());
+
+  const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is too big to inline";
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
+    VLOG(compiler) << "Method " << PrettyMethod(method)
                    << " is not inlined because of try block";
     return false;
   }
 
-  if (!resolved_method->GetDeclaringClass()->IsVerified()) {
-    uint16_t class_def_idx = resolved_method->GetDeclaringClass()->GetDexClassDefIndex();
+  if (!method->GetDeclaringClass()->IsVerified()) {
+    uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
     if (!compiler_driver_->IsMethodVerifiedWithoutFailures(
-          resolved_method->GetDexMethodIndex(), class_def_idx, *resolved_method->GetDexFile())) {
+          method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
       VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
                      << " couldn't be verified, so it cannot be inlined";
       return false;
@@ -273,7 +424,7 @@
     return false;
   }
 
-  if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) {
+  if (!TryBuildAndInline(method, invoke_instruction, same_dex_file, do_rtp)) {
     return false;
   }
 
@@ -284,7 +435,8 @@
 
 bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
                                  HInvoke* invoke_instruction,
-                                 bool same_dex_file) {
+                                 bool same_dex_file,
+                                 bool do_rtp) {
   ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
   const DexFile& callee_dex_file = *resolved_method->GetDexFile();
@@ -337,6 +489,7 @@
       invoke_type,
       graph_->IsDebuggable(),
       graph_->GetCurrentInstructionId());
+  callee_graph->SetArtMethod(resolved_method);
 
   OptimizingCompilerStats inline_stats;
   HGraphBuilder builder(callee_graph,
@@ -418,6 +571,7 @@
   size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
   if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
     HInliner inliner(callee_graph,
+                     outermost_graph_,
                      codegen_,
                      outer_compilation_unit_,
                      dex_compilation_unit,
@@ -529,9 +683,9 @@
   HNullConstant* null_constant = graph_->GetNullConstant();
   if (!null_constant->GetReferenceTypeInfo().IsValid()) {
     ReferenceTypeInfo::TypeHandle obj_handle =
-            handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
+        handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject));
     null_constant->SetReferenceTypeInfo(
-            ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
+        ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
   }
 
   // Check the integrity of reference types and run another type propagation if needed.
@@ -550,14 +704,16 @@
          return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
     }
 
-    // If the return type is a refinement of the declared type run the type propagation again.
-    ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
-    ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
-    if (invoke_rti.IsStrictSupertypeOf(return_rti)
-        || (return_rti.IsExact() && !invoke_rti.IsExact())
-        || !return_replacement->CanBeNull()) {
-      ReferenceTypePropagation rtp_fixup(graph_, handles_);
-      rtp_fixup.Run();
+    if (do_rtp) {
+      // If the return type is a refinement of the declared type run the type propagation again.
+      ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+      ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+      if (invoke_rti.IsStrictSupertypeOf(return_rti)
+          || (return_rti.IsExact() && !invoke_rti.IsExact())
+          || !return_replacement->CanBeNull()) {
+        ReferenceTypePropagation rtp_fixup(graph_, handles_);
+        rtp_fixup.Run();
+      }
     }
   }
 
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 0f6a945..7b9fb73 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -27,11 +27,13 @@
 class DexCompilationUnit;
 class HGraph;
 class HInvoke;
+class InlineCache;
 class OptimizingCompilerStats;
 
 class HInliner : public HOptimization {
  public:
   HInliner(HGraph* outer_graph,
+           HGraph* outermost_graph,
            CodeGenerator* codegen,
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
@@ -40,6 +42,7 @@
            OptimizingCompilerStats* stats,
            size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
+        outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
         caller_compilation_unit_(caller_compilation_unit),
         codegen_(codegen),
@@ -54,10 +57,33 @@
 
  private:
   bool TryInline(HInvoke* invoke_instruction);
+
+  // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
+  // reference type propagation can run after the inlining.
+  bool TryInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp = true)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline the target of a monomorphic call. If successful, the code
+  // in the graph will look like:
+  // if (receiver.getClass() != ic.GetMonomorphicType()) deopt
+  // ... // inlined code
+  bool TryInlineMonomorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
+  // Try to inline targets of a polymorphic call. Currently unimplemented.
+  bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,
+                                ArtMethod* resolved_method,
+                                const InlineCache& ic)
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   bool TryBuildAndInline(ArtMethod* resolved_method,
                          HInvoke* invoke_instruction,
-                         bool same_dex_file);
+                         bool same_dex_file,
+                         bool do_rtp = true);
 
+  HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 54dd2cc..6a34b13 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -16,11 +16,16 @@
 
 #include "instruction_simplifier_arm64.h"
 
+#include "common_arm64.h"
 #include "mirror/array-inl.h"
 
 namespace art {
 namespace arm64 {
 
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+using helpers::ShifterOperandSupportsExtension;
+
 void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
                                                                      HInstruction* array,
                                                                      HInstruction* index,
@@ -62,6 +67,108 @@
   RecordSimplification();
 }
 
+bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
+                                                                   HInstruction* bitfield_op,
+                                                                   bool do_merge) {
+  DCHECK(HasShifterOperand(use));
+  DCHECK(use->IsBinaryOperation() || use->IsNeg());
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+  DCHECK(!bitfield_op->HasEnvironmentUses());
+
+  Primitive::Type type = use->GetType();
+  if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+    return false;
+  }
+
+  HInstruction* left;
+  HInstruction* right;
+  if (use->IsBinaryOperation()) {
+    left = use->InputAt(0);
+    right = use->InputAt(1);
+  } else {
+    DCHECK(use->IsNeg());
+    right = use->AsNeg()->InputAt(0);
+    left = GetGraph()->GetConstant(right->GetType(), 0);
+  }
+  DCHECK(left == bitfield_op || right == bitfield_op);
+
+  if (left == right) {
+    // TODO: Handle special transformations in this situation?
+    // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+    // Or should this be part of a separate transformation logic?
+    return false;
+  }
+
+  bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative();
+  HInstruction* other_input;
+  if (bitfield_op == right) {
+    other_input = left;
+  } else {
+    if (is_commutative) {
+      other_input = right;
+    } else {
+      return false;
+    }
+  }
+
+  HArm64DataProcWithShifterOp::OpKind op_kind;
+  int shift_amount = 0;
+  HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+
+  if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
+      !ShifterOperandSupportsExtension(use)) {
+    return false;
+  }
+
+  if (do_merge) {
+    HArm64DataProcWithShifterOp* alu_with_op =
+        new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
+                                                                 other_input,
+                                                                 bitfield_op->InputAt(0),
+                                                                 op_kind,
+                                                                 shift_amount,
+                                                                 use->GetDexPc());
+    use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+    if (bitfield_op->GetUses().IsEmpty()) {
+      bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+    }
+    RecordSimplification();
+  }
+
+  return true;
+}
+
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+  DCHECK(CanFitInShifterOperand(bitfield_op));
+
+  if (bitfield_op->HasEnvironmentUses()) {
+    return false;
+  }
+
+  const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+  // Check whether we can merge the instruction in all its users' shifter operand.
+  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+    HInstruction* use = it_use.Current()->GetUser();
+    if (!HasShifterOperand(use)) {
+      return false;
+    }
+    if (!CanMergeIntoShifterOperand(use, bitfield_op)) {
+      return false;
+    }
+  }
+
+  // Merge the instruction into its uses.
+  for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+    HInstruction* use = it_use.Current()->GetUser();
+    bool merged = MergeIntoShifterOperand(use, bitfield_op);
+    DCHECK(merged);
+  }
+
+  return true;
+}
+
 bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
     HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
   DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
@@ -210,5 +317,37 @@
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) {
+  Primitive::Type result_type = instruction->GetResultType();
+  Primitive::Type input_type = instruction->GetInputType();
+
+  if (input_type == result_type) {
+    // We let the arch-independent code handle this.
+    return;
+  }
+
+  if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
+void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) {
+  if (instruction->InputAt(1)->IsConstant()) {
+    TryMergeIntoUsersShifterOperand(instruction);
+  }
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index eed2276..b7f490b 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -39,6 +39,17 @@
                                     HInstruction* array,
                                     HInstruction* index,
                                     int access_size);
+  bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+  bool TryMergeIntoShifterOperand(HInstruction* use,
+                                  HInstruction* bitfield_op,
+                                  bool do_merge);
+  bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    return TryMergeIntoShifterOperand(use, bitfield_op, false);
+  }
+  bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+    DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+    return TryMergeIntoShifterOperand(use, bitfield_op, true);
+  }
 
   bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
                                            HBinaryOperation* input_binop,
@@ -48,6 +59,10 @@
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitShl(HShl* instruction) OVERRIDE;
+  void VisitShr(HShr* instruction) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+  void VisitUShr(HUShr* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b01324e..8340811 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -384,7 +384,7 @@
   // InvokeStaticOrDirect.
   InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
   InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
-      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
       invoke->IsInvokeVirtual() ? kVirtual : kSuper;
   switch (intrinsic_type) {
     case kStatic:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index d2017da..5329b5c 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -113,10 +113,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -127,10 +127,10 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -358,7 +358,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
+  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
 }
 
 void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
@@ -377,7 +377,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
+  GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
 }
 
 void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
@@ -390,7 +390,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
+  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
 }
 
 void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
@@ -409,7 +409,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
+  GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
 }
 
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
@@ -429,7 +429,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
@@ -437,7 +437,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -486,7 +486,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 
@@ -495,7 +495,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -526,7 +526,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -534,7 +534,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
@@ -742,22 +742,22 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
@@ -787,31 +787,34 @@
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke);
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke);
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
 }
 
 static void GenUnsafePut(LocationSummary* locations,
@@ -873,31 +876,67 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
@@ -1245,7 +1284,8 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1265,7 +1305,8 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1644,7 +1685,7 @@
                        temp2,
                        dest,
                        Register(kNoRegister),
-                       false);
+                       /* can_be_null */ false);
 
   __ Bind(slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 059abf0..962c4d5 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -143,7 +143,23 @@
 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
   Dispatch(invoke);
   LocationSummary* res = invoke->GetLocations();
-  return res != nullptr && res->Intrinsified();
+  if (res == nullptr) {
+    return false;
+  }
+  if (kEmitCompilerReadBarrier && res->CanCall()) {
+    // Generating an intrinsic for this HInvoke may produce an
+    // IntrinsicSlowPathARM64 slow path.  Currently this approach
+    // does not work when using read barriers, as the emitted
+    // calling sequence will make use of another slow path
+    // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
+    // ReadBarrierSlowPathARM64 for HInvokeVirtual).  So we bail
+    // out in this case.
+    //
+    // TODO: Find a way to have intrinsics work with read barriers.
+    invoke->SetLocations(nullptr);
+    return false;
+  }
+  return res->Intrinsified();
 }
 
 #define __ masm->
@@ -186,10 +202,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetVIXLAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetVIXLAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -200,10 +216,10 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetVIXLAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetVIXLAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -461,7 +477,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetVIXLAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -469,7 +485,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetVIXLAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -498,7 +514,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetVIXLAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
@@ -506,7 +522,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetVIXLAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -541,7 +557,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -549,7 +565,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -557,7 +573,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetVIXLAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -565,7 +581,8 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetVIXLAssembler());
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -598,7 +615,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
@@ -606,7 +623,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -614,7 +631,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -622,7 +639,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetVIXLAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
@@ -698,7 +715,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), true, GetVIXLAssembler());
+  GenMathRound(invoke->GetLocations(), /* is_double */ true, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -706,7 +723,7 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
-  GenMathRound(invoke->GetLocations(), false, GetVIXLAssembler());
+  GenMathRound(invoke->GetLocations(), /* is_double */ false, GetVIXLAssembler());
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
@@ -818,9 +835,12 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
   vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
-  Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
-  Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
-  Register trg = RegisterFrom(locations->Out(), type);
+  Location base_loc = locations->InAt(1);
+  Register base = WRegisterFrom(base_loc);      // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = XRegisterFrom(offset_loc);  // Long offset.
+  Location trg_loc = locations->Out();
+  Register trg = RegisterFrom(trg_loc, type);
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   MemOperand mem_op(base.X(), offset);
@@ -837,13 +857,18 @@
 
   if (type == Primitive::kPrimNot) {
     DCHECK(trg.IsW());
-    codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg);
+    codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
   }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           can_call ?
+                                                               LocationSummary::kCallOnSlowPath :
+                                                               LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -871,22 +896,22 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
@@ -977,31 +1002,67 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1057,6 +1118,9 @@
   if (use_acquire_release) {
     __ Bind(&loop_head);
     __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1065,6 +1129,9 @@
     __ Dmb(InnerShareable, BarrierWrites);
     __ Bind(&loop_head);
     __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+    // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+    // Note that this code is not (yet) used when read barriers are
+    // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
     __ Cmp(tmp_value, expected);
     __ B(&exit_loop, ne);
     __ Stxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1090,7 +1157,11 @@
   // The UnsafeCASObject intrinsic does not always work when heap
   // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
   // off temporarily as a quick fix.
+  //
   // TODO(rpl): Fix it and turn it back on.
+  //
+  // TODO(rpl): Also, we should investigate whether we need a read
+  // barrier in the generated code.
   if (kPoisonHeapReferences) {
     return;
   }
@@ -1345,7 +1416,8 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+  GenerateVisitStringIndexOf(
+      invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1365,7 +1437,8 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+  GenerateVisitStringIndexOf(
+      invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 3268445..9ecce0e 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -43,6 +43,14 @@
   return codegen_->GetGraph()->GetArena();
 }
 
+inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() {
+  return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+}
+
+inline bool IntrinsicCodeGeneratorMIPS::IsR6() {
+  return codegen_->GetInstructionSetFeatures().IsR6();
+}
+
 #define __ codegen->GetAssembler()->
 
 static void MoveFromReturnRegister(Location trg,
@@ -168,7 +176,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // int java.lang.Float.floatToRawIntBits(float)
@@ -177,7 +185,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -210,7 +218,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Float.intBitsToFloat(int)
@@ -219,24 +227,29 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
-static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntToIntLocations(ArenaAllocator* arena,
+                                    HInvoke* invoke,
+                                    Location::OutputOverlap overlaps = Location::kNoOutputOverlap) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(), overlaps);
 }
 
-static void GenReverseBytes(LocationSummary* locations,
-                            Primitive::Type type,
-                            MipsAssembler* assembler,
-                            bool isR2OrNewer) {
+static void GenReverse(LocationSummary* locations,
+                       Primitive::Type type,
+                       bool isR2OrNewer,
+                       bool isR6,
+                       bool reverseBits,
+                       MipsAssembler* assembler) {
   DCHECK(type == Primitive::kPrimShort ||
          type == Primitive::kPrimInt ||
          type == Primitive::kPrimLong);
+  DCHECK(type != Primitive::kPrimShort || !reverseBits);
 
   if (type == Primitive::kPrimShort) {
     Register in = locations->InAt(0).AsRegister<Register>();
@@ -273,6 +286,30 @@
       __ And(out, out, AT);
       __ Or(out, out, TMP);
     }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out, out);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out, out, 4);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out, out, 2);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out, out, 1);
+        __ And(out, out, AT);
+        __ Or(out, TMP, out);
+      }
+    }
   } else if (type == Primitive::kPrimLong) {
     Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
     Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
@@ -314,6 +351,46 @@
       __ And(out_lo, out_lo, AT);
       __ Or(out_lo, out_lo, TMP);
     }
+    if (reverseBits) {
+      if (isR6) {
+        __ Bitswap(out_hi, out_hi);
+        __ Bitswap(out_lo, out_lo);
+      } else {
+        __ LoadConst32(AT, 0x0F0F0F0F);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_hi, out_hi, 4);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 4);
+        __ Srl(out_lo, out_lo, 4);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x33333333);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_hi, out_hi, 2);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 2);
+        __ Srl(out_lo, out_lo, 2);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+        __ LoadConst32(AT, 0x55555555);
+        __ And(TMP, out_hi, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_hi, out_hi, 1);
+        __ And(out_hi, out_hi, AT);
+        __ Or(out_hi, TMP, out_hi);
+        __ And(TMP, out_lo, AT);
+        __ Sll(TMP, TMP, 1);
+        __ Srl(out_lo, out_lo, 1);
+        __ And(out_lo, out_lo, AT);
+        __ Or(out_lo, TMP, out_lo);
+      }
+    }
   }
 }
 
@@ -323,10 +400,12 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(),
-                  Primitive::kPrimInt,
-                  GetAssembler(),
-                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             IsR2OrNewer(),
+             IsR6(),
+             false,
+             GetAssembler());
 }
 
 // long java.lang.Long.reverseBytes(long)
@@ -335,10 +414,12 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(),
-                  Primitive::kPrimLong,
-                  GetAssembler(),
-                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             IsR2OrNewer(),
+             IsR6(),
+             false,
+             GetAssembler());
 }
 
 // short java.lang.Short.reverseBytes(short)
@@ -347,10 +428,397 @@
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations(),
-                  Primitive::kPrimShort,
-                  GetAssembler(),
-                  codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimShort,
+             IsR2OrNewer(),
+             IsR6(),
+             false,
+             GetAssembler());
+}
+
+static void GenNumberOfLeadingZeroes(LocationSummary* locations,
+                                     bool is64bit,
+                                     bool isR6,
+                                     MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+  if (is64bit) {
+    Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    if (isR6) {
+      __ ClzR6(AT, in_hi);
+      __ ClzR6(TMP, in_lo);
+      __ Seleqz(TMP, TMP, in_hi);
+    } else {
+      __ ClzR2(AT, in_hi);
+      __ ClzR2(TMP, in_lo);
+      __ Movn(TMP, ZERO, in_hi);
+    }
+    __ Addu(out, AT, TMP);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+
+    if (isR6) {
+      __ ClzR6(out, in);
+    } else {
+      __ ClzR2(out, in);
+    }
+  }
+}
+
+// int java.lang.Integer.numberOfLeadingZeros(int i)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, IsR6(), GetAssembler());
+}
+
+// int java.lang.Long.numberOfLeadingZeros(long i)
+void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, IsR6(), GetAssembler());
+}
+
+static void GenNumberOfTrailingZeroes(LocationSummary* locations,
+                                      bool is64bit,
+                                      bool isR6,
+                                      bool isR2OrNewer,
+                                      MipsAssembler* assembler) {
+  Register out = locations->Out().AsRegister<Register>();
+  Register in_lo;
+  Register in;
+
+  if (is64bit) {
+    MipsLabel done;
+    Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+    in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+
+    // If in_lo is zero then count the number of trailing zeroes in in_hi;
+    // otherwise count the number of trailing zeroes in in_lo.
+    // AT = in_lo ? in_lo : in_hi;
+    if (isR6) {
+      __ Seleqz(out, in_hi, in_lo);
+      __ Selnez(TMP, in_lo, in_lo);
+      __ Or(out, out, TMP);
+    } else {
+      __ Movz(out, in_hi, in_lo);
+      __ Movn(out, in_lo, in_lo);
+    }
+
+    in = out;
+  } else {
+    in = locations->InAt(0).AsRegister<Register>();
+    // Give in_lo a dummy value to keep the compiler from complaining.
+    // Since we only get here in the 32-bit case, this value will never
+    // be used.
+    in_lo = in;
+  }
+
+  // We don't have an instruction to count the number of trailing zeroes.
+  // Start by flipping the bits end-for-end so we can count the number of
+  // leading zeroes instead.
+  if (isR2OrNewer) {
+    __ Rotr(out, in, 16);
+    __ Wsbh(out, out);
+  } else {
+    // MIPS32r1
+    // __ Rotr(out, in, 16);
+    __ Sll(TMP, in, 16);
+    __ Srl(out, in, 16);
+    __ Or(out, out, TMP);
+    // __ Wsbh(out, out);
+    __ LoadConst32(AT, 0x00FF00FF);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 8);
+    __ Srl(out, out, 8);
+    __ And(out, out, AT);
+    __ Or(out, out, TMP);
+  }
+
+  if (isR6) {
+    __ Bitswap(out, out);
+    __ ClzR6(out, out);
+  } else {
+    __ LoadConst32(AT, 0x0F0F0F0F);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 4);
+    __ Srl(out, out, 4);
+    __ And(out, out, AT);
+    __ Or(out, TMP, out);
+    __ LoadConst32(AT, 0x33333333);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 2);
+    __ Srl(out, out, 2);
+    __ And(out, out, AT);
+    __ Or(out, TMP, out);
+    __ LoadConst32(AT, 0x55555555);
+    __ And(TMP, out, AT);
+    __ Sll(TMP, TMP, 1);
+    __ Srl(out, out, 1);
+    __ And(out, out, AT);
+    __ Or(out, TMP, out);
+    __ ClzR2(out, out);
+  }
+
+  if (is64bit) {
+    // If in_lo is zero, then we counted the number of trailing zeroes in in_hi so we must add the
+    // number of trailing zeroes in in_lo (32) to get the correct final count
+    __ LoadConst32(TMP, 32);
+    if (isR6) {
+      __ Seleqz(TMP, TMP, in_lo);
+    } else {
+      __ Movn(TMP, ZERO, in_lo);
+    }
+    __ Addu(out, out, TMP);
+  }
+}
+
+// int java.lang.Integer.numberOfTrailingZeros(int i)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, IsR6(), IsR2OrNewer(), GetAssembler());
+}
+
+// int java.lang.Long.numberOfTrailingZeros(long i)
+void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, IsR6(), IsR2OrNewer(), GetAssembler());
+}
+
+enum RotationDirection {
+  kRotateRight,
+  kRotateLeft,
+};
+
+static void GenRotate(HInvoke* invoke,
+                      Primitive::Type type,
+                      bool isR2OrNewer,
+                      RotationDirection direction,
+                      MipsAssembler* assembler) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+  LocationSummary* locations = invoke->GetLocations();
+  if (invoke->InputAt(1)->IsIntConstant()) {
+    int32_t shift = static_cast<int32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue());
+    if (type == Primitive::kPrimInt) {
+      Register in = locations->InAt(0).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      shift &= 0x1f;
+      if (direction == kRotateLeft) {
+        shift = (32 - shift) & 0x1F;
+      }
+
+      if (isR2OrNewer) {
+        if ((shift != 0) || (out != in)) {
+          __ Rotr(out, in, shift);
+        }
+      } else {
+        if (shift == 0) {
+          if (out != in) {
+            __ Move(out, in);
+          }
+        } else {
+          __ Srl(AT, in, shift);
+          __ Sll(out, in, 32 - shift);
+          __ Or(out, out, AT);
+        }
+      }
+    } else {    // Primitive::kPrimLong
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      shift &= 0x3f;
+      if (direction == kRotateLeft) {
+        shift = (64 - shift) & 0x3F;
+      }
+
+      if (shift == 0) {
+        __ Move(out_lo, in_lo);
+        __ Move(out_hi, in_hi);
+      } else if (shift == 32) {
+        __ Move(out_lo, in_hi);
+        __ Move(out_hi, in_lo);
+      } else if (shift < 32) {
+        __ Srl(AT, in_lo, shift);
+        __ Sll(out_lo, in_hi, 32 - shift);
+        __ Or(out_lo, out_lo, AT);
+        __ Srl(AT, in_hi, shift);
+        __ Sll(out_hi, in_lo, 32 - shift);
+        __ Or(out_hi, out_hi, AT);
+      } else {
+        __ Sll(AT, in_lo, 64 - shift);
+        __ Srl(out_lo, in_hi, shift - 32);
+        __ Or(out_lo, out_lo, AT);
+        __ Sll(AT, in_hi, 64 - shift);
+        __ Srl(out_hi, in_lo, shift - 32);
+        __ Or(out_hi, out_hi, AT);
+      }
+    }
+  } else {      // !invoke->InputAt(1)->IsIntConstant()
+    Register shamt = locations->InAt(1).AsRegister<Register>();
+    if (type == Primitive::kPrimInt) {
+      Register in = locations->InAt(0).AsRegister<Register>();
+      Register out = locations->Out().AsRegister<Register>();
+
+      if (isR2OrNewer) {
+        if (direction == kRotateRight) {
+          __ Rotrv(out, in, shamt);
+        } else {
+          // negu tmp, shamt
+          __ Subu(TMP, ZERO, shamt);
+          __ Rotrv(out, in, TMP);
+        }
+      } else {
+        if (direction == kRotateRight) {
+          __ Srlv(AT, in, shamt);
+          __ Subu(TMP, ZERO, shamt);
+          __ Sllv(out, in, TMP);
+          __ Or(out, out, AT);
+        } else {
+          __ Sllv(AT, in, shamt);
+          __ Subu(TMP, ZERO, shamt);
+          __ Srlv(out, in, TMP);
+          __ Or(out, out, AT);
+        }
+      }
+    } else {    // Primitive::kPrimLong
+      Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+      Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+      Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+      Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+      MipsLabel done;
+
+      if (direction == kRotateRight) {
+        __ Nor(TMP, ZERO, shamt);
+        __ Srlv(AT, in_lo, shamt);
+        __ Sll(out_lo, in_hi, 1);
+        __ Sllv(out_lo, out_lo, TMP);
+        __ Or(out_lo, out_lo, AT);
+        __ Srlv(AT, in_hi, shamt);
+        __ Sll(out_hi, in_lo, 1);
+        __ Sllv(out_hi, out_hi, TMP);
+        __ Or(out_hi, out_hi, AT);
+      } else {
+        __ Nor(TMP, ZERO, shamt);
+        __ Sllv(AT, in_lo, shamt);
+        __ Srl(out_lo, in_hi, 1);
+        __ Srlv(out_lo, out_lo, TMP);
+        __ Or(out_lo, out_lo, AT);
+        __ Sllv(AT, in_hi, shamt);
+        __ Srl(out_hi, in_lo, 1);
+        __ Srlv(out_hi, out_hi, TMP);
+        __ Or(out_hi, out_hi, AT);
+      }
+
+      __ Andi(TMP, shamt, 32);
+      __ Beqz(TMP, &done);
+      __ Move(TMP, out_hi);
+      __ Move(out_hi, out_lo);
+      __ Move(out_lo, TMP);
+
+      __ Bind(&done);
+    }
+  }
+}
+
+// int java.lang.Integer.rotateRight(int i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerRotateRight(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimInt, IsR2OrNewer(), kRotateRight, GetAssembler());
+}
+
+// long java.lang.Long.rotateRight(long i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitLongRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongRotateRight(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimLong, IsR2OrNewer(), kRotateRight, GetAssembler());
+}
+
+// int java.lang.Integer.rotateLeft(int i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerRotateLeft(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimInt, IsR2OrNewer(), kRotateLeft, GetAssembler());
+}
+
+// long java.lang.Long.rotateLeft(long i, int distance)
+void IntrinsicLocationsBuilderMIPS::VisitLongRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongRotateLeft(HInvoke* invoke) {
+  GenRotate(invoke, Primitive::kPrimLong, IsR2OrNewer(), kRotateLeft, GetAssembler());
+}
+
+// int java.lang.Integer.reverse(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimInt,
+             IsR2OrNewer(),
+             IsR6(),
+             true,
+             GetAssembler());
+}
+
+// long java.lang.Long.reverse(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) {
+  GenReverse(invoke->GetLocations(),
+             Primitive::kPrimLong,
+             IsR2OrNewer(),
+             IsR6(),
+             true,
+             GetAssembler());
 }
 
 // boolean java.lang.String.equals(Object anObject)
@@ -463,10 +931,6 @@
 void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
 UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
 UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
 UNIMPLEMENTED_INTRINSIC(MathAbsInt)
@@ -519,12 +983,6 @@
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars)
 UNIMPLEMENTED_INTRINSIC(StringNewStringFromString)
-UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
-UNIMPLEMENTED_INTRINSIC(LongRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
-UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros)
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index c71b3c6..19ad525 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -67,6 +67,9 @@
 #undef INTRINSICS_LIST
 #undef OPTIMIZING_INTRINSICS
 
+  bool IsR2OrNewer(void);
+  bool IsR6(void);
+
  private:
   MipsAssembler* GetAssembler();
 
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index ecee11d..36e1b20 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -162,7 +162,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // int java.lang.Float.floatToRawIntBits(float)
@@ -171,7 +171,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -199,7 +199,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Float.intBitsToFloat(int)
@@ -208,7 +208,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -290,7 +290,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfLeadingZeros(long i)
@@ -299,7 +299,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeroes(invoke->GetLocations(), true, GetAssembler());
+  GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenNumberOfTrailingZeroes(LocationSummary* locations,
@@ -327,7 +327,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), false, GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // int java.lang.Long.numberOfTrailingZeros(long i)
@@ -336,7 +336,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeroes(invoke->GetLocations(), true, GetAssembler());
+  GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenRotateRight(HInvoke* invoke,
@@ -525,7 +525,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 // float java.lang.Math.abs(float)
@@ -534,7 +534,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -566,7 +566,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 // long java.lang.Math.abs(long)
@@ -575,7 +575,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -616,7 +616,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
 }
 
 // float java.lang.Math.min(float, float)
@@ -625,7 +625,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
 }
 
 // double java.lang.Math.max(double, double)
@@ -634,7 +634,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
 }
 
 // float java.lang.Math.max(float, float)
@@ -643,7 +643,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations,
@@ -713,7 +713,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 // long java.lang.Math.min(long, long)
@@ -722,7 +722,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
 }
 
 // int java.lang.Math.max(int, int)
@@ -731,7 +731,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 // long java.lang.Math.max(long, long)
@@ -740,7 +740,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
 }
 
 // double java.lang.Math.sqrt(double)
@@ -1045,7 +1045,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
@@ -1054,7 +1054,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
@@ -1063,7 +1063,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 
 // long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
@@ -1072,7 +1072,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
@@ -1081,7 +1081,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
@@ -1090,7 +1090,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1151,7 +1151,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x)
@@ -1160,7 +1164,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x)
@@ -1169,7 +1177,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putObject(Object o, long offset, Object x)
@@ -1178,7 +1190,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x)
@@ -1187,7 +1203,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x)
@@ -1196,7 +1216,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putLong(Object o, long offset, long x)
@@ -1205,7 +1229,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x)
@@ -1214,7 +1242,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
 }
 
 // void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
@@ -1223,7 +1255,11 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1565,7 +1601,7 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 // int java.lang.String.indexOf(int ch, int fromIndex)
@@ -1584,7 +1620,8 @@
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 // java.lang.String.String(byte[] bytes)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 371588f..5b67cde 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -138,31 +138,31 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  CreateFPToIntLocations(arena_, invoke, true);
+  CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  CreateIntToFPLocations(arena_, invoke, true);
+  CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
 }
 
 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  CreateFPToIntLocations(arena_, invoke, false);
+  CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  CreateIntToFPLocations(arena_, invoke, false);
+  CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
 }
 
 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -298,7 +298,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
@@ -306,7 +306,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
@@ -490,7 +490,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -498,7 +498,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -506,7 +506,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -514,7 +514,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler());
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -597,7 +597,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
@@ -605,7 +605,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -613,7 +613,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -621,7 +621,7 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -1265,19 +1265,20 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, true);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, false);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
 }
 
 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1660,42 +1661,42 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, true, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ true, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, false);
+  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke, false, true);
+  CreateIntIntIntToIntLocations(arena_, invoke, /* is_long */ false, /* is_volatile */ true);
 }
 
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 
@@ -1722,31 +1723,40 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
 }
 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
+  CreateIntIntIntIntToVoidPlusTempsLocations(
+      arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
 }
 
 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
@@ -1798,31 +1808,31 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 2d9f01b..ecd129f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -115,10 +115,10 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -129,10 +129,10 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
-  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
-  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -230,7 +230,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -238,7 +238,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
+  MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -277,7 +277,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
@@ -285,7 +285,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
-  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
 static void GenMinMaxFP(LocationSummary* locations,
@@ -388,7 +388,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
@@ -396,7 +397,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -404,7 +406,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -412,7 +415,8 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
+  GenMinMaxFP(
+      invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -461,7 +465,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
@@ -469,7 +473,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
@@ -477,7 +481,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
@@ -485,7 +489,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
-  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -690,7 +694,7 @@
   __ j(kUnordered, &nan);
 
   // output = double-to-long-truncate(input)
-  __ cvttsd2si(out, inPlusPointFive, true);
+  __ cvttsd2si(out, inPlusPointFive, /* is64bit */ true);
   __ jmp(&done);
   __ Bind(&nan);
 
@@ -1152,7 +1156,7 @@
                        temp2,
                        dest,
                        CpuRegister(kNoRegister),
-                       false);
+                       /* value_can_be_null */ false);
 
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1180,8 +1184,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo),
+                                  /* no_rip */ true));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1372,7 +1376,7 @@
     // Ensure we have a start index >= 0;
     __ xorl(counter, counter);
     __ cmpl(start_index, Immediate(0));
-    __ cmov(kGreater, counter, start_index, false);  // 32-bit copy is enough.
+    __ cmov(kGreater, counter, start_index, /* is64bit */ false);  // 32-bit copy is enough.
 
     // Move to the start of the string: string_obj + value_offset + 2 * start_index.
     __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
@@ -1409,19 +1413,20 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, true);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  CreateStringIndexOfLocations(invoke, arena_, false);
+  CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
-  GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+  GenerateStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -1446,8 +1451,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes),
+                                  /* no_rip */ true));
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1466,8 +1471,8 @@
 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
   X86_64Assembler* assembler = GetAssembler();
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars),
+                                  /* no_rip */ true));
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
@@ -1490,8 +1495,8 @@
   codegen_->AddSlowPath(slow_path);
   __ j(kEqual, slow_path->GetEntryLabel());
 
-  __ gs()->call(Address::Absolute(
-        QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
+  __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString),
+                                  /* no_rip */ true));
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1715,7 +1720,8 @@
 
 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
   CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
-  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(),
+                                                    /* no_rip */ true));
 }
 
 static void GenUnsafeGet(HInvoke* invoke,
@@ -1786,22 +1792,22 @@
 
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 
 
@@ -1885,31 +1891,31 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
 }
 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index c38bbe3..02befc0 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -121,6 +121,8 @@
           // phi in it.
           if (instruction->NeedsEnvironment()) {
             UpdateLoopPhisIn(instruction->GetEnvironment(), loop_info);
+          } else {
+            DCHECK(!instruction->HasEnvironment());
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
         } else if (instruction->CanThrow()) {
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 5b89cfe..389ada7 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -725,7 +725,7 @@
           // instruction is a store in the loop so the loop must does write.
           DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
 
-          if (loop_info->IsLoopInvariant(original_ref, false)) {
+          if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
             DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
             // Keep the store since its value may be needed at the loop header.
             possibly_redundant = false;
@@ -933,8 +933,9 @@
 };
 
 void LoadStoreElimination::Run() {
-  if (graph_->IsDebuggable()) {
+  if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
     // Debugger may set heap values or trigger deoptimization of callers.
+    // Try/catch support not implemented yet.
     // Skip this optimization.
     return;
   }
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 890598d..461be25 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -587,15 +587,8 @@
   return other.blocks_.IsBitSet(header_->GetBlockId());
 }
 
-bool HLoopInformation::IsLoopInvariant(HInstruction* instruction, bool must_dominate) const {
-  HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation();
-  if (other_loop != this && (other_loop == nullptr || !other_loop->IsIn(*this))) {
-    if (must_dominate) {
-      return instruction->GetBlock()->Dominates(GetHeader());
-    }
-    return true;
-  }
-  return false;
+bool HLoopInformation::IsDefinedOutOfTheLoop(HInstruction* instruction) const {
+  return !blocks_.IsBitSet(instruction->GetBlock()->GetBlockId());
 }
 
 size_t HLoopInformation::GetLifetimeEnd() const {
@@ -1177,6 +1170,59 @@
   }
 }
 
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+  DCHECK(!CanThrow());
+  DCHECK(!HasSideEffects());
+  DCHECK(!HasEnvironmentUses());
+  DCHECK(HasNonEnvironmentUses());
+  DCHECK(!IsPhi());  // Makes no sense for Phi.
+  DCHECK_EQ(InputCount(), 0u);
+
+  // Find the target block.
+  HUseIterator<HInstruction*> uses_it(GetUses());
+  HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+  uses_it.Advance();
+  while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+    uses_it.Advance();
+  }
+  if (!uses_it.Done()) {
+    // This instruction has uses in two or more blocks. Find the common dominator.
+    CommonDominator finder(target_block);
+    for (; !uses_it.Done(); uses_it.Advance()) {
+      finder.Update(uses_it.Current()->GetUser()->GetBlock());
+    }
+    target_block = finder.Get();
+    DCHECK(target_block != nullptr);
+  }
+  // Move to the first dominator not in a loop.
+  while (target_block->IsInLoop()) {
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position.
+  HInstruction* insert_pos = nullptr;
+  for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+    if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = uses_it2.Current()->GetUser();
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  MoveBefore(insert_pos);
+}
+
 HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
   DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
   DCHECK_EQ(cursor->GetBlock(), this);
@@ -1890,7 +1936,7 @@
  *             |
  *          if_block
  *           /    \
- *  dummy_block   deopt_block
+ *  true_block   false_block
  *           \    /
  *       new_pre_header
  *             |
@@ -1898,62 +1944,73 @@
  */
 void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
   DCHECK(header->IsLoopHeader());
-  HBasicBlock* pre_header = header->GetDominator();
+  HBasicBlock* old_pre_header = header->GetDominator();
 
-  // Need this to avoid critical edge.
+  // Need extra block to avoid critical edge.
   HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  // Need this to avoid critical edge.
-  HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc());
-  HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc());
   HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
   AddBlock(if_block);
-  AddBlock(dummy_block);
-  AddBlock(deopt_block);
+  AddBlock(true_block);
+  AddBlock(false_block);
   AddBlock(new_pre_header);
 
-  header->ReplacePredecessor(pre_header, new_pre_header);
-  pre_header->successors_.clear();
-  pre_header->dominated_blocks_.clear();
+  header->ReplacePredecessor(old_pre_header, new_pre_header);
+  old_pre_header->successors_.clear();
+  old_pre_header->dominated_blocks_.clear();
 
-  pre_header->AddSuccessor(if_block);
-  if_block->AddSuccessor(dummy_block);  // True successor
-  if_block->AddSuccessor(deopt_block);  // False successor
-  dummy_block->AddSuccessor(new_pre_header);
-  deopt_block->AddSuccessor(new_pre_header);
+  old_pre_header->AddSuccessor(if_block);
+  if_block->AddSuccessor(true_block);  // True successor
+  if_block->AddSuccessor(false_block);  // False successor
+  true_block->AddSuccessor(new_pre_header);
+  false_block->AddSuccessor(new_pre_header);
 
-  pre_header->dominated_blocks_.push_back(if_block);
-  if_block->SetDominator(pre_header);
-  if_block->dominated_blocks_.push_back(dummy_block);
-  dummy_block->SetDominator(if_block);
-  if_block->dominated_blocks_.push_back(deopt_block);
-  deopt_block->SetDominator(if_block);
+  old_pre_header->dominated_blocks_.push_back(if_block);
+  if_block->SetDominator(old_pre_header);
+  if_block->dominated_blocks_.push_back(true_block);
+  true_block->SetDominator(if_block);
+  if_block->dominated_blocks_.push_back(false_block);
+  false_block->SetDominator(if_block);
   if_block->dominated_blocks_.push_back(new_pre_header);
   new_pre_header->SetDominator(if_block);
   new_pre_header->dominated_blocks_.push_back(header);
   header->SetDominator(new_pre_header);
 
+  // Fix reverse post order.
   size_t index_of_header = IndexOfElement(reverse_post_order_, header);
   MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1);
   reverse_post_order_[index_of_header++] = if_block;
-  reverse_post_order_[index_of_header++] = dummy_block;
-  reverse_post_order_[index_of_header++] = deopt_block;
+  reverse_post_order_[index_of_header++] = true_block;
+  reverse_post_order_[index_of_header++] = false_block;
   reverse_post_order_[index_of_header++] = new_pre_header;
 
-  HLoopInformation* info = pre_header->GetLoopInformation();
-  if (info != nullptr) {
-    if_block->SetLoopInformation(info);
-    dummy_block->SetLoopInformation(info);
-    deopt_block->SetLoopInformation(info);
-    new_pre_header->SetLoopInformation(info);
-    for (HLoopInformationOutwardIterator loop_it(*pre_header);
+  // Fix loop information.
+  HLoopInformation* loop_info = old_pre_header->GetLoopInformation();
+  if (loop_info != nullptr) {
+    if_block->SetLoopInformation(loop_info);
+    true_block->SetLoopInformation(loop_info);
+    false_block->SetLoopInformation(loop_info);
+    new_pre_header->SetLoopInformation(loop_info);
+    // Add blocks to all enveloping loops.
+    for (HLoopInformationOutwardIterator loop_it(*old_pre_header);
          !loop_it.Done();
          loop_it.Advance()) {
       loop_it.Current()->Add(if_block);
-      loop_it.Current()->Add(dummy_block);
-      loop_it.Current()->Add(deopt_block);
+      loop_it.Current()->Add(true_block);
+      loop_it.Current()->Add(false_block);
       loop_it.Current()->Add(new_pre_header);
     }
   }
+
+  // Fix try/catch information.
+  TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock()
+      ? old_pre_header->GetTryCatchInformation()
+      : nullptr;
+  if_block->SetTryCatchInformation(try_catch_info);
+  true_block->SetTryCatchInformation(try_catch_info);
+  false_block->SetTryCatchInformation(try_catch_info);
+  new_pre_header->SetTryCatchInformation(try_catch_info);
 }
 
 void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
@@ -2068,6 +2125,26 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+  switch (rhs) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+      return os << "string_init";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+      return os << "recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+      return os << "direct";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+      return os << "direct_fixup";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      return os << "dex_cache_pc_relative";
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      return os << "dex_cache_via_method";
+    default:
+      LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
@@ -2077,7 +2154,8 @@
     case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
       return os << "none";
     default:
-      return os << "unknown:" << static_cast<int>(rhs);
+      LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d3f30cb..3e38e9f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -371,6 +371,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  ArtMethod* GetArtMethod() const { return art_method_; }
+  void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+
   // Returns an instruction with the opposite boolean value from 'cond'.
   // The instruction has been inserted into the graph, either as a constant, or
   // before cursor.
@@ -479,6 +482,11 @@
 
   HCurrentMethod* cached_current_method_;
 
+  // The ArtMethod this graph is for. Note that for AOT, it may be null,
+  // for example for methods whose declaring class could not be resolved
+  // (such as when the superclass could not be found).
+  ArtMethod* art_method_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -556,11 +564,8 @@
   // Note that `other` *must* be populated before entering this function.
   bool IsIn(const HLoopInformation& other) const;
 
-  // Returns true if instruction is not defined within this loop or any loop nested inside
-  // this loop. If must_dominate is set, only definitions that actually dominate the loop
-  // header can be invariant. Otherwise, any definition outside the loop, including
-  // definitions that appear after the loop, is invariant.
-  bool IsLoopInvariant(HInstruction* instruction, bool must_dominate) const;
+  // Returns true if instruction is not defined within this loop.
+  bool IsDefinedOutOfTheLoop(HInstruction* instruction) const;
 
   const ArenaBitVector& GetBlocks() const { return blocks_; }
 
@@ -1095,12 +1100,18 @@
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
 
+#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
+  M(ArmDexCacheArraysBase, Instruction)
+#endif
 
 #ifndef ART_ENABLE_CODEGEN_arm64
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
+  M(Arm64DataProcWithShifterOp, Instruction)                            \
   M(Arm64IntermediateAddress, Instruction)                              \
   M(Arm64MultiplyAccumulate, Instruction)
 #endif
@@ -1951,6 +1962,14 @@
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
+  // Move `this` before its first user and out of any loops. If there is no
+  // out-of-loop user that dominates all other users, move the instruction
+  // to the end of the out-of-loop common dominator of the user's blocks.
+  //
+  // This can be used only on non-throwing instructions with no side effects that
+  // have at least one use but no environment uses.
+  void MoveBeforeFirstUserAndOutOfLoops();
+
 #define INSTRUCTION_TYPE_CHECK(type, super)                                    \
   bool Is##type() const { return (As##type() != nullptr); }                    \
   virtual const H##type* As##type() const { return nullptr; }                  \
@@ -2448,11 +2467,15 @@
 // Deoptimize to interpreter, upon checking a condition.
 class HDeoptimize : public HTemplateInstruction<1> {
  public:
-  explicit HDeoptimize(HInstruction* cond, uint32_t dex_pc)
+  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::None(), dex_pc) {
     SetRawInputAt(0, cond);
   }
 
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+    return true;
+  }
   bool NeedsEnvironment() const OVERRIDE { return true; }
   bool CanThrow() const OVERRIDE { return true; }
 
@@ -3417,7 +3440,7 @@
                         MethodReference target_method,
                         DispatchInfo dispatch_info,
                         InvokeType original_invoke_type,
-                        InvokeType invoke_type,
+                        InvokeType optimized_invoke_type,
                         ClinitCheckRequirement clinit_check_requirement)
       : HInvoke(arena,
                 number_of_arguments,
@@ -3431,7 +3454,7 @@
                 dex_pc,
                 method_index,
                 original_invoke_type),
-        invoke_type_(invoke_type),
+        optimized_invoke_type_(optimized_invoke_type),
         clinit_check_requirement_(clinit_check_requirement),
         target_method_(target_method),
         dispatch_info_(dispatch_info) { }
@@ -3477,7 +3500,11 @@
   // platform-specific special input, such as PC-relative addressing base.
   uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
 
-  InvokeType GetInvokeType() const { return invoke_type_; }
+  InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
+  void SetOptimizedInvokeType(InvokeType invoke_type) {
+    optimized_invoke_type_ = invoke_type;
+  }
+
   MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
   CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
   bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
@@ -3500,6 +3527,7 @@
   }
   bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
   MethodReference GetTargetMethod() const { return target_method_; }
+  void SetTargetMethod(MethodReference method) { target_method_ = method; }
 
   int32_t GetStringInitOffset() const {
     DCHECK(IsStringInit());
@@ -3525,7 +3553,7 @@
 
   // Is this instruction a call to a static method?
   bool IsStatic() const {
-    return GetInvokeType() == kStatic;
+    return GetOriginalInvokeType() == kStatic;
   }
 
   // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
@@ -3598,7 +3626,7 @@
   void RemoveInputAt(size_t index);
 
  private:
-  const InvokeType invoke_type_;
+  InvokeType optimized_invoke_type_;
   ClinitCheckRequirement clinit_check_requirement_;
   // The target method may refer to different dex file or method index than the original
   // invoke. This happens for sharpened calls and for calls where a method was redeclared
@@ -3608,6 +3636,7 @@
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
 };
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
 
 class HInvokeVirtual : public HInvoke {
@@ -5565,6 +5594,9 @@
 
 }  // namespace art
 
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "nodes_arm64.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000..6a1dbb9
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+        dex_file_(&dex_file),
+        element_offset_(static_cast<size_t>(-1)) { }
+
+  void UpdateElementOffset(size_t element_offset) {
+    // Use the lowest offset from the requested elements so that all offsets from
+    // this base are non-negative because our assemblers emit negative-offset loads
+    // as a sequence of two or more instructions. (However, positive offsets beyond
+    // 4KiB also require two or more instructions, so this simple heuristic could
+    // be improved for cases where there is a dense cluster of elements far from
+    // the lowest offset. This is expected to be rare enough though, so we choose
+    // not to spend compile time on elaborate calculations.)
+    element_offset_ = std::min(element_offset_, element_offset);
+  }
+
+  const DexFile& GetDexFile() const {
+    return *dex_file_;
+  }
+
+  size_t GetElementOffset() const {
+    return element_offset_;
+  }
+
+  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+  const DexFile* dex_file_;
+  size_t element_offset_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc
new file mode 100644
index 0000000..ac2f093
--- /dev/null
+++ b/compiler/optimizing/nodes_arm64.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_arm64.h"
+#include "nodes.h"
+
+namespace art {
+
+using arm64::helpers::CanFitInShifterOperand;
+
+void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+                                                           /*out*/OpKind* op_kind,
+                                                           /*out*/int* shift_amount) {
+  DCHECK(CanFitInShifterOperand(instruction));
+  if (instruction->IsShl()) {
+    *op_kind = kLSL;
+    *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsShr()) {
+    *op_kind = kASR;
+    *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue();
+  } else if (instruction->IsUShr()) {
+    *op_kind = kLSR;
+    *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue();
+  } else {
+    DCHECK(instruction->IsTypeConversion());
+    Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType();
+    Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType();
+    int result_size = Primitive::ComponentSize(result_type);
+    int input_size = Primitive::ComponentSize(input_type);
+    int min_size = std::min(result_size, input_size);
+    // This follows the logic in
+    // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
+    if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+      // There is actually nothing to do. The register will be used as a W
+      // register, discarding the top bits. This is represented by the default
+      // encoding 'LSL 0'.
+      *op_kind = kLSL;
+      *shift_amount = 0;
+    } else if (result_type == Primitive::kPrimChar ||
+               (input_type == Primitive::kPrimChar && input_size < result_size)) {
+      *op_kind = kUXTH;
+    } else {
+      switch (min_size) {
+        case 1: *op_kind = kSXTB; break;
+        case 2: *op_kind = kSXTH; break;
+        case 4: *op_kind = kSXTW; break;
+        default:
+          LOG(FATAL) << "Unexpected min size " << min_size;
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+  switch (op) {
+    case HArm64DataProcWithShifterOp::kLSL:  return os << "LSL";
+    case HArm64DataProcWithShifterOp::kLSR:  return os << "LSR";
+    case HArm64DataProcWithShifterOp::kASR:  return os << "ASR";
+    case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
+    case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
+    case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
+    case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
+    case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
+    case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+    default:
+      LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index d07f019..e843935 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -19,6 +19,79 @@
 
 namespace art {
 
+class HArm64DataProcWithShifterOp : public HExpression<2> {
+ public:
+  enum OpKind {
+    kLSL,   // Logical shift left.
+    kLSR,   // Logical shift right.
+    kASR,   // Arithmetic shift right.
+    kUXTB,  // Unsigned extend byte.
+    kUXTH,  // Unsigned extend half-word.
+    kUXTW,  // Unsigned extend word.
+    kSXTB,  // Signed extend byte.
+    kSXTH,  // Signed extend half-word.
+    kSXTW,  // Signed extend word.
+
+    // Aliases.
+    kFirstShiftOp = kLSL,
+    kLastShiftOp = kASR,
+    kFirstExtensionOp = kUXTB,
+    kLastExtensionOp = kSXTW
+  };
+  HArm64DataProcWithShifterOp(HInstruction* instr,
+                              HInstruction* left,
+                              HInstruction* right,
+                              OpKind op,
+                              // The shift argument is unused if the operation
+                              // is an extension.
+                              int shift = 0,
+                              uint32_t dex_pc = kNoDexPc)
+      : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+        instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
+    DCHECK(!instr->HasSideEffects());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE {
+    HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+    return instr_kind_ == other->instr_kind_ &&
+        op_kind_ == other->op_kind_ &&
+        shift_amount_ == other->shift_amount_;
+  }
+
+  static bool IsShiftOp(OpKind op_kind) {
+    return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+  }
+
+  static bool IsExtensionOp(OpKind op_kind) {
+    return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+  }
+
+  // Find the operation kind and shift amount from a bitfield move instruction.
+  static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+                                       /*out*/OpKind* op_kind,
+                                       /*out*/int* shift_amount);
+
+  InstructionKind GetInstrKind() const { return instr_kind_; }
+  OpKind GetOpKind() const { return op_kind_; }
+  int GetShiftAmount() const { return shift_amount_; }
+
+  DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
+
+ private:
+  InstructionKind instr_kind_;
+  OpKind op_kind_;
+  int shift_amount_;
+
+  friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
+};
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
+
 // This instruction computes an intermediate address pointing in the 'middle' of an object. The
 // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
 // never used across anything that can trigger GC.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index dec08d8..4643aed 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,6 +20,10 @@
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
@@ -391,10 +395,11 @@
       || instruction_set == kX86_64;
 }
 
-// Read barrier are supported only on ARM, x86 and x86-64 at the moment.
+// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
 // TODO: Add support for other architectures and remove this function
 static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
-  return instruction_set == kThumb2
+  return instruction_set == kArm64
+      || instruction_set == kThumb2
       || instruction_set == kX86
       || instruction_set == kX86_64;
 }
@@ -422,7 +427,7 @@
     return;
   }
   HInliner* inliner = new (graph->GetArena()) HInliner(
-    graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
+      graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -434,6 +439,17 @@
                                  PassObserver* pass_observer) {
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+    case kThumb2:
+    case kArm: {
+      arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+      HOptimization* arm_optimizations[] = {
+        fixups
+      };
+      RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+      break;
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64: {
       arm64::InstructionSimplifierArm64* simplifier =
@@ -499,12 +515,13 @@
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
   HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+  HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
   SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
   GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
   LICM* licm = new (arena) LICM(graph, *side_effects);
   LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
   ReferenceTypePropagation* type_propagation =
       new (arena) ReferenceTypePropagation(graph, &handles);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
@@ -514,7 +531,6 @@
       graph, stats, "instruction_simplifier_after_bce");
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier_before_codegen");
-
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver);
 
   HOptimization* optimizations1[] = {
@@ -526,48 +542,30 @@
     dce1,
     simplify2
   };
-
   RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
 
   MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
 
-  // TODO: Update passes incompatible with try/catch so we have the same
-  //       pipeline for all methods.
-  if (graph->HasTryCatch()) {
-    HOptimization* optimizations2[] = {
-      boolean_simplify,
-      side_effects,
-      gvn,
-      dce2,
-      // The codegen has a few assumptions that only the instruction simplifier
-      // can satisfy. For example, the code generator does not expect to see a
-      // HTypeConversion from a type to the same type.
-      simplify4,
-    };
-
-    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-  } else {
-    HOptimization* optimizations2[] = {
-      // BooleanSimplifier depends on the InstructionSimplifier removing
-      // redundant suspend checks to recognize empty blocks.
-      boolean_simplify,
-      fold2,  // TODO: if we don't inline we can also skip fold2.
-      side_effects,
-      gvn,
-      licm,
-      induction,
-      bce,
-      simplify3,
-      lse,
-      dce2,
-      // The codegen has a few assumptions that only the instruction simplifier
-      // can satisfy. For example, the code generator does not expect to see a
-      // HTypeConversion from a type to the same type.
-      simplify4,
-    };
-
-    RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
-  }
+  HOptimization* optimizations2[] = {
+    // BooleanSimplifier depends on the InstructionSimplifier removing
+    // redundant suspend checks to recognize empty blocks.
+    boolean_simplify,
+    fold2,  // TODO: if we don't inline we can also skip fold2.
+    side_effects,
+    gvn,
+    licm,
+    induction,
+    bce,
+    fold3,  // evaluates code generated by dynamic bce
+    simplify3,
+    lse,
+    dce2,
+    // The codegen has a few assumptions that only the instruction simplifier
+    // can satisfy. For example, the code generator does not expect to see a
+    // HTypeConversion from a type to the same type.
+    simplify4,
+  };
+  RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
   RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
   AllocateRegisters(graph, codegen, pass_observer);
@@ -606,8 +604,6 @@
   stack_map.resize(codegen->ComputeStackMapsSize());
   codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
-
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -642,7 +638,6 @@
   ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
   codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
 
-  MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
@@ -748,8 +743,8 @@
     ArtMethod* art_method = compiler_driver->ResolveMethod(
         soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type);
     // We may not get a method, for example if its class is erroneous.
-    // TODO: Clean this up, the compiler driver should just pass the ArtMethod to compile.
     if (art_method != nullptr) {
+      graph->SetArtMethod(art_method);
       interpreter_metadata = art_method->GetQuickenedInfo();
     }
   }
@@ -846,6 +841,7 @@
                    dex_file,
                    dex_cache));
     if (codegen.get() != nullptr) {
+      MaybeRecordStat(MethodCompilationStat::kCompiled);
       if (run_optimizations_) {
         method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
       } else {
@@ -856,7 +852,7 @@
     if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
       MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
     } else {
-      MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+      MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
     }
   }
 
@@ -932,6 +928,7 @@
   if (stack_map_data == nullptr) {
     return false;
   }
+  MaybeRecordStat(MethodCompilationStat::kCompiled);
   codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
   const void* code = code_cache->CommitCode(
       self,
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1..6296eed 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
 
-#include <sstream>
+#include <iomanip>
 #include <string>
 #include <type_traits>
 
@@ -27,18 +27,18 @@
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
-  kCompiledBaseline,
-  kCompiledOptimized,
+  kCompiled,
   kInlinedInvoke,
   kInstructionSimplifications,
   kInstructionSimplificationsArch,
   kUnresolvedMethod,
   kUnresolvedField,
   kUnresolvedFieldNotAFastAccess,
+  kRemovedCheckedCast,
+  kRemovedDeadInstruction,
+  kRemovedNullCheck,
   kNotCompiledBranchOutsideMethodCode,
   kNotCompiledCannotBuildSSA,
-  kNotCompiledCantAccesType,
-  kNotCompiledClassNotVerified,
   kNotCompiledHugeMethod,
   kNotCompiledLargeMethodNoBranches,
   kNotCompiledMalformedOpcode,
@@ -47,13 +47,12 @@
   kNotCompiledSpaceFilter,
   kNotCompiledUnhandledInstruction,
   kNotCompiledUnsupportedIsa,
+  kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
-  kNotOptimizedDisabled,
-  kNotOptimizedRegisterAllocator,
-  kNotOptimizedTryCatch,
-  kRemovedCheckedCast,
-  kRemovedDeadInstruction,
-  kRemovedNullCheck,
+  kInlinedMonomorphicCall,
+  kMonomorphicCall,
+  kPolymorphicCall,
+  kMegamorphicCall,
   kLastStat
 };
 
@@ -66,20 +65,19 @@
   }
 
   void Log() const {
+    if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+      // Log only in debug builds or if the compiler is verbose.
+      return;
+    }
+
     if (compile_stats_[kAttemptCompilation] == 0) {
       LOG(INFO) << "Did not compile any method.";
     } else {
-      size_t unoptimized_percent =
-          compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
-      size_t optimized_percent =
-          compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
-      std::ostringstream oss;
-      oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
-      oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
-      oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
-      LOG(INFO) << oss.str();
+      float compiled_percent =
+          compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+      LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+          << " methods: " << std::fixed << std::setprecision(2)
+          << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
 
       for (int i = 0; i < kLastStat; i++) {
         if (compile_stats_[i] != 0) {
@@ -92,41 +90,42 @@
 
  private:
   std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+    std::string name;
     switch (stat) {
-      case kAttemptCompilation : return "kAttemptCompilation";
-      case kCompiledBaseline : return "kCompiledBaseline";
-      case kCompiledOptimized : return "kCompiledOptimized";
-      case kInlinedInvoke : return "kInlinedInvoke";
-      case kInstructionSimplifications: return "kInstructionSimplifications";
-      case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
-      case kUnresolvedMethod : return "kUnresolvedMethod";
-      case kUnresolvedField : return "kUnresolvedField";
-      case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
-      case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
-      case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
-      case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
-      case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
-      case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
-      case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
-      case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
-      case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
-      case kNotCompiledPathological : return "kNotCompiledPathological";
-      case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
-      case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
-      case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
-      case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
-      case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
-      case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
-      case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
-      case kRemovedCheckedCast: return "kRemovedCheckedCast";
-      case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
-      case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCompiled : name = "Compiled"; break;
+      case kInlinedInvoke : name = "InlinedInvoke"; break;
+      case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+      case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+      case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+      case kUnresolvedField : name = "UnresolvedField"; break;
+      case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+      case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+      case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+      case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+      case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+      case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+      case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+      case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+      case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+      case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+      case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+      case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+      case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+      case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+      case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+      case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+      case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kMonomorphicCall: name = "MonomorphicCall"; break;
+      case kPolymorphicCall: name = "PolymorphicCall"; break;
+      case kMegamorphicCall: name = "kMegamorphicCall"; break;
 
-      case kLastStat: break;  // Invalid to print out.
+      case kLastStat:
+        LOG(FATAL) << "invalid stat "
+            << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+        UNREACHABLE();
     }
-    LOG(FATAL) << "invalid stat "
-        << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
-    UNREACHABLE();
+    return "OptStat#" + name;
   }
 
   AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 808a1dc..a385448 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,6 +15,7 @@
  */
 
 #include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
 
 namespace art {
 namespace x86 {
@@ -26,6 +27,15 @@
  public:
   explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
 
+  void MoveBaseIfNeeded() {
+    if (base_ != nullptr) {
+      // Bring the base closer to the first use (previously, it was in the
+      // entry block) and relieve some pressure on the register allocator
+      // while avoiding recalculation of the base in a loop.
+      base_->MoveBeforeFirstUserAndOutOfLoops();
+    }
+  }
+
  private:
   void VisitAdd(HAdd* add) OVERRIDE {
     BinaryFP(add);
@@ -70,9 +80,13 @@
   }
 
   void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+    if (switch_insn->GetNumEntries() <=
+        InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+      return;
+    }
     // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
     // address the constant area.
-    InitializePCRelativeBasePointer(switch_insn);
+    InitializePCRelativeBasePointer();
     HGraph* graph = GetGraph();
     HBasicBlock* block = switch_insn->GetBlock();
     HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,22 +98,22 @@
     block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
   }
 
-  void InitializePCRelativeBasePointer(HInstruction* user) {
+  void InitializePCRelativeBasePointer() {
     // Ensure we only initialize the pointer once.
     if (base_ != nullptr) {
       return;
     }
 
-    HGraph* graph = GetGraph();
-    HBasicBlock* entry = graph->GetEntryBlock();
-    base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
-    HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
-    entry->InsertInstructionBefore(base_, insert_pos);
+    // Insert the base at the start of the entry block, move it to a better
+    // position later in MoveBaseIfNeeded().
+    base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+    HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+    entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
     DCHECK(base_ != nullptr);
   }
 
   void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
-    InitializePCRelativeBasePointer(insn);
+    InitializePCRelativeBasePointer();
     HX86LoadFromConstantTable* load_constant =
         new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
     insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
@@ -111,7 +125,7 @@
     // addressing, we need the PC-relative address base.
     HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
     if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
-      InitializePCRelativeBasePointer(invoke);
+      InitializePCRelativeBasePointer();
       // Add the extra parameter base_.
       DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
       invoke_static_or_direct->AddSpecialInput(base_);
@@ -133,6 +147,7 @@
 void PcRelativeFixups::Run() {
   PCRelativeHandlerVisitor visitor(graph_);
   visitor.VisitInsertionOrder();
+  visitor.MoveBaseIfNeeded();
 }
 
 }  // namespace x86
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 0d05c49..fea903d 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -127,6 +127,87 @@
   }
 }
 
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+  ArenaAllocatorAdapter<void> adapter =
+      root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+  ArenaVector<HPhi*> visited_phis(adapter);
+  ArenaVector<HInstruction*> worklist(adapter);
+  worklist.push_back(root_instr);
+
+  while (!worklist.empty()) {
+    HInstruction* instr = worklist.back();
+    worklist.pop_back();
+
+    if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+      // Expect that both `root_instr` and its inputs have invalid RTI.
+      ScopedObjectAccess soa(Thread::Current());
+      DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+      // Insert all unvisited inputs to the worklist.
+      for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+        HInstruction* input = it.Current();
+        if (input->IsPhi()) {
+          if (ContainsElement(visited_phis, input->AsPhi())) {
+            continue;
+          } else {
+            visited_phis.push_back(input->AsPhi());
+          }
+        }
+        worklist.push_back(input);
+      }
+    } else if (instr->IsNullConstant()) {
+      // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+    } else {
+      LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+          << instr->GetReferenceTypeInfo();
+      UNREACHABLE();
+    }
+  }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+  ScopedObjectAccess soa(Thread::Current());
+  for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+    for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+    for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+        fn(instr);
+      }
+    }
+  }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+  // In some cases, the fix-point iteration will leave kPrimNot instructions with
+  // invalid RTI because bytecode does not provide enough typing information.
+  // Set the RTI of such instructions to Object.
+  // Example:
+  //   MyClass a = null, b = null;
+  //   while (a == null) {
+  //     if (cond) { a = b; } else { b = a; }
+  //   }
+
+  if (kIsDebugBuild) {
+    // Test that if we are going to set RTI from invalid to Object, that
+    // instruction did not have any typed instructions in its def-use chain
+    // and therefore its type could not be inferred.
+    ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+  }
+
+  ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+  ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+    instr->SetReferenceTypeInfo(obj_rti);
+  });
+}
+
 void ReferenceTypePropagation::Run() {
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
@@ -136,6 +217,7 @@
   }
 
   ProcessWorklist();
+  SetUntypedInstructionsToObject();
   ValidateTypes();
 }
 
@@ -387,7 +469,7 @@
       // but then we would need to pass it to RTPVisitor just for this debug check. Since
       // the method is from the String class, the null loader is good enough.
       Handle<mirror::ClassLoader> loader;
-      ArtMethod* method = cl->ResolveMethod(
+      ArtMethod* method = cl->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           invoke->GetDexFile(), invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect);
       DCHECK(method != nullptr);
       mirror::Class* declaring_class = method->GetDeclaringClass();
@@ -534,8 +616,9 @@
 void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
   ScopedObjectAccess soa(Thread::Current());
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
-  instr->SetReferenceTypeInfo(parent_rti);
+  if (parent_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(parent_rti);
+  }
 }
 
 void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -588,11 +671,16 @@
   }
 
   if (phi->GetBlock()->IsLoopHeader()) {
+    ScopedObjectAccess soa(Thread::Current());
     // Set the initial type for the phi. Use the non back edge input for reaching
     // a fixed point faster.
+    HInstruction* first_input = phi->InputAt(0);
+    ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+    if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+      phi->SetCanBeNull(first_input->CanBeNull());
+      phi->SetReferenceTypeInfo(first_input_rti);
+    }
     AddToWorklist(phi);
-    phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
-    phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
   } else {
     // Eagerly compute the type of the phi, for quicker convergence. Note
     // that we don't need to add users to the worklist because we are
@@ -653,7 +741,9 @@
   DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
 
   ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
-  DCHECK(parent_rti.IsValid());
+  if (!parent_rti.IsValid()) {
+    return;
+  }
 
   Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
   if (handle->IsObjectArrayClass()) {
@@ -665,8 +755,6 @@
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
   }
-
-  return;
 }
 
 bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -683,7 +771,7 @@
       instr->SetReferenceTypeInfo(parent_rti);
     }
   } else if (instr->IsArrayGet()) {
-    // TODO: consider if it's worth "looking back" and bounding the input object
+    // TODO: consider if it's worth "looking back" and binding the input object
     // to an array type.
     UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
   } else {
@@ -711,6 +799,7 @@
   if (instr->GetType() != Primitive::kPrimNot) {
     return;
   }
+
   ScopedObjectAccess soa(Thread::Current());
   UpdateArrayGet(instr, handles_, object_class_handle_);
   if (!instr->GetReferenceTypeInfo().IsValid()) {
@@ -770,7 +859,10 @@
       }
     }
   }
-  instr->SetReferenceTypeInfo(new_rti);
+
+  if (new_rti.IsValid()) {
+    instr->SetReferenceTypeInfo(new_rti);
+  }
 }
 
 // Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5c05592..21789e1 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -57,6 +57,7 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ValidateTypes();
+  void SetUntypedInstructionsToObject();
 
   StackHandleScopeCollection* handles_;
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a128079..5e1d1d9 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -49,7 +49,8 @@
   }
 
   // TODO: Avoid CompilerDriver.
-  InvokeType invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+  InvokeType optimized_invoke_type = original_invoke_type;
   MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
   int vtable_idx;
   uintptr_t direct_code, direct_method;
@@ -58,15 +59,18 @@
       invoke->GetDexPc(),
       false /* update_stats: already updated in builder */,
       true /* enable_devirtualization */,
-      &invoke_type,
+      &optimized_invoke_type,
       &target_method,
       &vtable_idx,
       &direct_code,
       &direct_method);
-  DCHECK(success);
-  DCHECK_EQ(invoke_type, invoke->GetInvokeType());
-  DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
-  DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+  if (!success) {
+    // TODO: try using kDexCachePcRelative. It's always a valid method load
+    // kind as long as it's supported by the codegen
+    return;
+  }
+  invoke->SetOptimizedInvokeType(optimized_invoke_type);
+  invoke->SetTargetMethod(target_method);
 
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 39e5259..48465e6 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -57,7 +57,7 @@
       __ LoadFromOffset(kLoadWord, PC, R0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (R0).
-      __ LoadFromOffset(kLoadWord, IP, R0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadWord, IP, R0, JNIEnvExt::SelfOffset(4).Int32Value());
       __ LoadFromOffset(kLoadWord, PC, IP, offset.Int32Value());
       break;
     case kQuickAbi:  // R9 holds Thread*.
@@ -91,7 +91,7 @@
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
       __ LoadRawPtr(Arm64ManagedRegister::FromXRegister(IP1),
                       Arm64ManagedRegister::FromXRegister(X0),
-                      Offset(JNIEnvExt::SelfOffset().Int32Value()));
+                      Offset(JNIEnvExt::SelfOffset(8).Int32Value()));
 
       __ JumpTo(Arm64ManagedRegister::FromXRegister(IP1), Offset(offset.Int32Value()),
                 Arm64ManagedRegister::FromXRegister(IP0));
@@ -126,7 +126,7 @@
       __ LoadFromOffset(kLoadWord, T9, A0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
-      __ LoadFromOffset(kLoadWord, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadWord, T9, A0, JNIEnvExt::SelfOffset(4).Int32Value());
       __ LoadFromOffset(kLoadWord, T9, T9, offset.Int32Value());
       break;
     case kQuickAbi:  // S1 holds Thread*.
@@ -158,7 +158,7 @@
       __ LoadFromOffset(kLoadDoubleword, T9, A0, offset.Int32Value());
       break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
-      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset().Int32Value());
+      __ LoadFromOffset(kLoadDoubleword, T9, A0, JNIEnvExt::SelfOffset(8).Int32Value());
       __ LoadFromOffset(kLoadDoubleword, T9, T9, offset.Int32Value());
       break;
     case kQuickAbi:  // Fall-through.
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 4a6e6d7..98a1a8f 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -22,6 +22,7 @@
 
 #include "base/bit_utils.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/managed_register_arm.h"
@@ -697,10 +698,9 @@
   // Most of these are pure virtual as they need to be implemented per instruction set.
 
   // Create a new literal with a given value.
-  // NOTE: Force the template parameter to be explicitly specified. In the absence of
-  // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
+  // NOTE: Force the template parameter to be explicitly specified.
   template <typename T>
-  Literal* NewLiteral(typename std::decay<T>::type value) {
+  Literal* NewLiteral(typename Identity<T>::type value) {
     static_assert(std::is_integral<T>::value, "T must be an integral type.");
     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
   }
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 7ad5b44..cdeb443 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2569,30 +2569,19 @@
 
 void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
   CheckCondition(cond);
-  bool must_be_32bit = force_32bit_;
-  if (IsHighRegister(rd)|| imm16 >= 256u) {
-    must_be_32bit = true;
-  }
-
-  if (must_be_32bit) {
-    // Use encoding T3.
-    uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
-    uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
-    uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
-    uint32_t imm8 = imm16 & 0xff;
-    int32_t encoding = B31 | B30 | B29 | B28 |
-                    B25 | B22 |
-                    static_cast<uint32_t>(rd) << 8 |
-                    i << 26 |
-                    imm4 << 16 |
-                    imm3 << 12 |
-                    imm8;
-    Emit32(encoding);
-  } else {
-    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
-                imm16;
-    Emit16(encoding);
-  }
+  // Always 32 bits, encoding T3. (Other encondings are called MOV, not MOVW.)
+  uint32_t imm4 = (imm16 >> 12) & 15U /* 0b1111 */;
+  uint32_t i = (imm16 >> 11) & 1U /* 0b1 */;
+  uint32_t imm3 = (imm16 >> 8) & 7U /* 0b111 */;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
 }
 
 
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 1de51a2..5ae2cc2 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -832,11 +832,12 @@
 TEST(Thumb2AssemblerTest, MovWMovT) {
   arm::Thumb2Assembler assembler;
 
-  __ movw(R4, 0);         // 16 bit.
-  __ movw(R4, 0x34);      // 16 bit.
-  __ movw(R9, 0x34);      // 32 bit due to high register.
-  __ movw(R3, 0x1234);    // 32 bit due to large value.
-  __ movw(R9, 0xffff);    // 32 bit due to large value and high register.
+  // Always 32 bit.
+  __ movw(R4, 0);
+  __ movw(R4, 0x34);
+  __ movw(R9, 0x34);
+  __ movw(R3, 0x1234);
+  __ movw(R9, 0xffff);
 
   // Always 32 bit.
   __ movt(R0, 0);
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 9246c82..886295e 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -439,14 +439,14 @@
   nullptr
 };
 const char* MovWMovTResults[] = {
-  "   0:	2400      	movs	r4, #0\n",
-  "   2:	2434      	movs	r4, #52	; 0x34\n",
-  "   4:	f240 0934 	movw	r9, #52	; 0x34\n",
-  "   8:	f241 2334 	movw	r3, #4660	; 0x1234\n",
-  "   c:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
-  "  10:	f2c0 0000 	movt	r0, #0\n",
-  "  14:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
-  "  18:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
+  "   0:	f240 0400 	movw  r4, #0\n",
+  "   4:	f240 0434 	movw  r4, #52 ; 0x34\n",
+  "   8:	f240 0934 	movw	r9, #52	; 0x34\n",
+  "   c:	f241 2334 	movw	r3, #4660	; 0x1234\n",
+  "  10:	f64f 79ff 	movw	r9, #65535	; 0xffff\n",
+  "  14:	f2c0 0000 	movt	r0, #0\n",
+  "  18:	f2c1 2034 	movt	r0, #4660	; 0x1234\n",
+  "  1c:	f6cf 71ff 	movt	r1, #65535	; 0xffff\n",
   nullptr
 };
 const char* SpecialAddSubResults[] = {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 73f3fa8..733ad2c 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -307,6 +307,46 @@
   EmitR(0, rs, rt, rd, 0, 0x27);
 }
 
+void MipsAssembler::Movz(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x0A);
+}
+
+void MipsAssembler::Movn(Register rd, Register rs, Register rt) {
+  CHECK(!IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x0B);
+}
+
+void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x35);
+}
+
+void MipsAssembler::Selnez(Register rd, Register rs, Register rt) {
+  CHECK(IsR6());
+  EmitR(0, rs, rt, rd, 0, 0x37);
+}
+
+void MipsAssembler::ClzR6(Register rd, Register rs) {
+  CHECK(IsR6());
+  EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10);
+}
+
+void MipsAssembler::ClzR2(Register rd, Register rs) {
+  CHECK(!IsR6());
+  EmitR(0x1C, rs, rd, rd, 0, 0x20);
+}
+
+void MipsAssembler::CloR6(Register rd, Register rs) {
+  CHECK(IsR6());
+  EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11);
+}
+
+void MipsAssembler::CloR2(Register rd, Register rs) {
+  CHECK(!IsR6());
+  EmitR(0x1C, rs, rd, rd, 0, 0x21);
+}
+
 void MipsAssembler::Seb(Register rd, Register rt) {
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20);
 }
@@ -319,6 +359,11 @@
   EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20);
 }
 
+void MipsAssembler::Bitswap(Register rd, Register rt) {
+  CHECK(IsR6());
+  EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20);
+}
+
 void MipsAssembler::Sll(Register rd, Register rt, int shamt) {
   CHECK(IsUint<5>(shamt)) << shamt;
   EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00);
@@ -347,6 +392,10 @@
   EmitR(0, rs, rt, rd, 0, 0x06);
 }
 
+void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) {
+  EmitR(0, rs, rt, rd, 1, 0x06);
+}
+
 void MipsAssembler::Srav(Register rd, Register rt, Register rs) {
   EmitR(0, rs, rt, rd, 0, 0x07);
 }
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 013ad60..62366f6 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -134,9 +134,19 @@
   void Xori(Register rt, Register rs, uint16_t imm16);
   void Nor(Register rd, Register rs, Register rt);
 
+  void Movz(Register rd, Register rs, Register rt);  // R2
+  void Movn(Register rd, Register rs, Register rt);  // R2
+  void Seleqz(Register rd, Register rs, Register rt);  // R6
+  void Selnez(Register rd, Register rs, Register rt);  // R6
+  void ClzR6(Register rd, Register rs);
+  void ClzR2(Register rd, Register rs);
+  void CloR6(Register rd, Register rs);
+  void CloR2(Register rd, Register rs);
+
   void Seb(Register rd, Register rt);  // R2+
   void Seh(Register rd, Register rt);  // R2+
   void Wsbh(Register rd, Register rt);  // R2+
+  void Bitswap(Register rd, Register rt);  // R6
 
   void Sll(Register rd, Register rt, int shamt);
   void Srl(Register rd, Register rt, int shamt);
@@ -144,6 +154,7 @@
   void Sra(Register rd, Register rt, int shamt);
   void Sllv(Register rd, Register rt, Register rs);
   void Srlv(Register rd, Register rt, Register rs);
+  void Rotrv(Register rd, Register rt, Register rs);  // R2+
   void Srav(Register rd, Register rt, Register rs);
 
   void Lb(Register rt, Register rs, uint16_t imm16);
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index e252765..f10acf9 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -38,9 +38,10 @@
   dex2oat_target_arch := 32
 endif
 
-# We need to explcitly give the arch, as giving 'both' will make the
-# build-art-executable rule compile dex2oat for 64bits.
 ifeq ($(HOST_PREFER_32_BIT),true)
+  # We need to explicitly restrict the host arch to 32-bit only, as
+  # giving 'both' would make build-art-executable generate a build
+  # rule for a 64-bit dex2oat executable too.
   dex2oat_host_arch := 32
 else
   dex2oat_host_arch := both
@@ -71,4 +72,8 @@
   endif
 endif
 
+# Clear locals now they've served their purpose.
+dex2oat_target_arch :=
+dex2oat_host_arch :=
+
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 89c2a7c..2aa4085 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -55,8 +55,10 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "dwarf/method_debug_info.h"
 #include "elf_file.h"
 #include "elf_writer.h"
+#include "elf_writer_quick.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
@@ -72,7 +74,6 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change.h"
 #include "utils.h"
-#include "vector_output_stream.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
 
@@ -214,7 +215,9 @@
   UsageError("      Example: --base=0x50000000");
   UsageError("");
   UsageError("  --boot-image=<file.art>: provide the image file for the boot class path.");
+  UsageError("      Do not include the arch as part of the name, it is added automatically.");
   UsageError("      Example: --boot-image=/system/framework/boot.art");
+  UsageError("               (specifies /system/framework/<arch>/boot.art as the image file)");
   UsageError("      Default: $ANDROID_ROOT/system/framework/boot.art");
   UsageError("");
   UsageError("  --android-root=<path>: used to locate libraries for portable linking.");
@@ -494,6 +497,7 @@
       app_image_(false),
       boot_image_(false),
       is_host_(false),
+      image_writer_(nullptr),
       driver_(nullptr),
       dump_stats_(false),
       dump_passes_(false),
@@ -1408,8 +1412,36 @@
 
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
-      if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
-                             oat_file_.get())) {
+      std::unique_ptr<ElfWriter> elf_writer =
+          CreateElfWriterQuick(instruction_set_, compiler_options_.get(), oat_file_.get());
+
+      elf_writer->Start();
+
+      OutputStream* rodata = elf_writer->StartRoData();
+      if (!oat_writer->WriteRodata(rodata)) {
+        LOG(ERROR) << "Failed to write .rodata section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndRoData(rodata);
+
+      OutputStream* text = elf_writer->StartText();
+      if (!oat_writer->WriteCode(text)) {
+        LOG(ERROR) << "Failed to write .text section to the ELF file " << oat_file_->GetPath();
+        return false;
+      }
+      elf_writer->EndText(text);
+
+      elf_writer->SetBssSize(oat_writer->GetBssSize());
+
+      elf_writer->WriteDynamicSection();
+
+      ArrayRef<const dwarf::MethodDebugInfo> method_infos(oat_writer->GetMethodDebugInfo());
+      elf_writer->WriteDebugInfo(method_infos);
+
+      ArrayRef<const uintptr_t> patch_locations(oat_writer->GetAbsolutePatchLocations());
+      elf_writer->WritePatchLocations(patch_locations);
+
+      if (!elf_writer->End()) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
         return false;
       }
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index c2f23aa..2d15f6f 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -139,6 +139,7 @@
   // SPECIAL2
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 2, "mul", "DST" },
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 32, "clz", "DS" },
+  { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 33, "clo", "DS" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 0, "madd", "ST" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 1, "maddu", "ST" },
   { kSpecial2Mask | 0xffff, (28 << kOpcodeShift) | 2, "mul", "DST" },
@@ -148,13 +149,34 @@
 
   // SPECIAL3
   { kSpecial3Mask | 0x3f, (31 << kOpcodeShift) | 3, "dext", "TSAZ", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (16 << 6) | 32, "seb", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (24 << 6) | 32, "seh", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | 32, "bitswap", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | 36, "dbitswap", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (2 << 6) | 36, "dsbh", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (5 << 6) | 36, "dshd", "DT", },
-  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f, (31 << kOpcodeShift) | (2 << 6) | 32, "wsbh", "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (16 << 6) | 32,
+    "seb",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (24 << 6) | 32,
+    "seh",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | 32,
+    "bitswap",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | 36,
+    "dbitswap",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (2 << 6) | 36,
+    "dsbh",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (5 << 6) | 36,
+    "dshd",
+    "DT", },
+  { kSpecial3Mask | (0x1f << 21) | (0x1f << 6) | 0x3f,
+    (31 << kOpcodeShift) | (2 << 6) | 32,
+    "wsbh",
+    "DT", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x26, "sc", "Tl", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x27, "scd", "Tl", },
   { kSpecial3Mask | 0x7f, (31 << kOpcodeShift) | 0x36, "ll", "Tl", },
diff --git a/imgdiag/Android.mk b/imgdiag/Android.mk
index d5d7c22..83315be 100644
--- a/imgdiag/Android.mk
+++ b/imgdiag/Android.mk
@@ -25,4 +25,8 @@
 # that the image it's analyzing be the same ISA as the runtime ISA.
 
 # Build variants {target,host} x {debug,ndebug} x {32,64}
-$(eval $(call build-art-multi-executable,imgdiag,$(IMGDIAG_SRC_FILES),libart-compiler libbacktrace,libcutils,libziparchive-host,art/compiler,both))
+#
+# Honor HOST_PREFER_32_BIT, as building a 64-bit imgdiag executable
+# when HOST_PREFER_32_BIT is true would require an unmet dependency on
+# 64-bit libbacktrace.
+$(eval $(call build-art-multi-executable,imgdiag,$(IMGDIAG_SRC_FILES),libart-compiler libbacktrace,libcutils,libziparchive-host,art/compiler,both,$(HOST_PREFER_32_BIT)))
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d6a8c9..a926ca5 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -36,6 +36,11 @@
 static const char* kImgDiagBootImage = "--boot-image";
 static const char* kImgDiagBinaryName = "imgdiag";
 
+// from kernel <include/linux/threads.h>
+#define PID_MAX_LIMIT (4*1024*1024)  // Upper bound. Most kernel configs will have smaller max pid.
+
+static const pid_t kImgDiagGuaranteedBadPid = (PID_MAX_LIMIT + 1);
+
 class ImgDiagTest : public CommonRuntimeTest {
  protected:
   virtual void SetUp() {
@@ -132,7 +137,8 @@
 
   // Run imgdiag --image-diff-pid=some_bad_pid and wait until it's done with a 0 exit code.
   std::string error_msg;
-  ASSERT_FALSE(ExecDefaultBootImage(-12345, &error_msg)) << "Incorrectly executed";
+  ASSERT_FALSE(ExecDefaultBootImage(kImgDiagGuaranteedBadPid,
+                                    &error_msg)) << "Incorrectly executed";
   UNUSED(error_msg);
 }
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 94eb82b..5833129 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -28,6 +28,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -41,6 +42,8 @@
 #include "gc/space/space-inl.h"
 #include "image.h"
 #include "indenter.h"
+#include "linker/buffered_output_stream.h"
+#include "linker/file_output_stream.h"
 #include "mapping_table.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -51,7 +54,6 @@
 #include "oat_file-inl.h"
 #include "oat_file_manager.h"
 #include "os.h"
-#include "output_stream.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
 #include "stack_map.h"
@@ -116,7 +118,7 @@
 
     File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
     std::unique_ptr<BufferedOutputStream> output_stream(
-        new BufferedOutputStream(new FileOutputStream(elf_file)));
+        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
     builder_.reset(new ElfBuilder<ElfTypes32>(isa, output_stream.get()));
 
     builder_->Start();
@@ -162,7 +164,7 @@
 
     builder_->End();
 
-    return builder_->Good() && output_stream->Flush();
+    return builder_->Good();
   }
 
   void Walk(Callback callback) {
@@ -1662,6 +1664,8 @@
         ImageHeader::kSectionDexCacheArrays);
     const auto& intern_section = image_header_.GetImageSection(
         ImageHeader::kSectionInternedStrings);
+    const auto& class_table_section = image_header_.GetImageSection(
+        ImageHeader::kSectionClassTable);
     stats_.header_bytes = header_bytes;
     stats_.alignment_bytes += RoundUp(header_bytes, kObjectAlignment) - header_bytes;
     // Add padding between the field and method section.
@@ -1678,6 +1682,7 @@
     stats_.art_method_bytes += method_section.Size();
     stats_.dex_cache_arrays_bytes += dex_cache_arrays_section.Size();
     stats_.interned_strings_bytes += intern_section.Size();
+    stats_.class_table_bytes += class_table_section.Size();
     stats_.Dump(os, indent_os);
     os << "\n";
 
@@ -2068,6 +2073,7 @@
     size_t art_method_bytes;
     size_t dex_cache_arrays_bytes;
     size_t interned_strings_bytes;
+    size_t class_table_bytes;
     size_t bitmap_bytes;
     size_t alignment_bytes;
 
@@ -2099,6 +2105,7 @@
           art_method_bytes(0),
           dex_cache_arrays_bytes(0),
           interned_strings_bytes(0),
+          class_table_bytes(0),
           bitmap_bytes(0),
           alignment_bytes(0),
           managed_code_bytes(0),
@@ -2261,6 +2268,7 @@
                                   "art_method_bytes       =  %8zd (%2.0f%% of art file bytes)\n"
                                   "dex_cache_arrays_bytes =  %8zd (%2.0f%% of art file bytes)\n"
                                   "interned_string_bytes  =  %8zd (%2.0f%% of art file bytes)\n"
+                                  "class_table_bytes      =  %8zd (%2.0f%% of art file bytes)\n"
                                   "bitmap_bytes           =  %8zd (%2.0f%% of art file bytes)\n"
                                   "alignment_bytes        =  %8zd (%2.0f%% of art file bytes)\n\n",
                                   header_bytes, PercentOfFileBytes(header_bytes),
@@ -2271,11 +2279,14 @@
                                   PercentOfFileBytes(dex_cache_arrays_bytes),
                                   interned_strings_bytes,
                                   PercentOfFileBytes(interned_strings_bytes),
+                                  class_table_bytes, PercentOfFileBytes(class_table_bytes),
                                   bitmap_bytes, PercentOfFileBytes(bitmap_bytes),
                                   alignment_bytes, PercentOfFileBytes(alignment_bytes))
             << std::flush;
-        CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes +
-                 dex_cache_arrays_bytes + interned_strings_bytes + bitmap_bytes + alignment_bytes);
+        CHECK_EQ(file_bytes,
+                 header_bytes + object_bytes + art_field_bytes + art_method_bytes +
+                 dex_cache_arrays_bytes + interned_strings_bytes + class_table_bytes +
+                 bitmap_bytes + alignment_bytes);
       }
 
       os << "object_bytes breakdown:\n";
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 3d9f7dc..723bb17 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -526,6 +526,20 @@
   temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots);
 }
 
+void PatchOat::PatchClassTable(const ImageHeader* image_header) {
+  const auto& section = image_header->GetImageSection(ImageHeader::kSectionClassTable);
+  // Note that we require that ReadFromMemory does not make an internal copy of the elements.
+  // This also relies on visit roots not doing any verification which could fail after we update
+  // the roots to be the image addresses.
+  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  ClassTable temp_table;
+  temp_table.ReadFromMemory(image_->Begin() + section.Offset());
+  FixupRootVisitor visitor(this);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&visitor, RootInfo(kRootUnknown));
+  temp_table.VisitRoots(buffered_visitor);
+}
+
+
 class RelocatedPointerVisitor {
  public:
   explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
@@ -606,6 +620,7 @@
   PatchArtFields(image_header);
   PatchArtMethods(image_header);
   PatchInternedStrings(image_header);
+  PatchClassTable(image_header);
   // Patch dex file int/long arrays which point to ArtFields.
   PatchDexFileArrays(img_roots);
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 0915014..38bd865 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -116,6 +116,8 @@
   void PatchArtMethods(const ImageHeader* image_header) SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchInternedStrings(const ImageHeader* image_header)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  void PatchClassTable(const ImageHeader* image_header)
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 0b0f094..74cc899 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -27,6 +27,7 @@
   base/arena_allocator.cc \
   base/arena_bit_vector.cc \
   base/bit_vector.cc \
+  base/file_magic.cc \
   base/hex_dump.cc \
   base/logging.cc \
   base/mutex.cc \
@@ -103,6 +104,7 @@
   jit/jit.cc \
   jit/jit_code_cache.cc \
   jit/jit_instrumentation.cc \
+  jit/offline_profiling_info.cc \
   jit/profiling_info.cc \
   lambda/art_lambda_method.cc \
   lambda/box_table.cc \
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index afa48cd..2cb2212 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -431,9 +431,9 @@
           [referrer] "r"(referrer), [hidden] "r"(hidden)
         : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
           "t8", "t9", "k0", "k1", "fp", "ra",
-          "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13",
-          "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26",
-          "f27", "f28", "f29", "f30", "f31",
+          "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11",
+          "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22",
+          "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31",
           "memory");  // clobber.
 #elif defined(__x86_64__) && !defined(__APPLE__)
 #define PUSH(reg) "pushq " # reg "\n\t .cfi_adjust_cfa_offset 8\n\t"
@@ -1193,7 +1193,8 @@
 
 
 TEST_F(StubTest, StringCompareTo) {
-#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || \
+    (defined(__mips__) && defined(__LP64__)) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -2042,7 +2043,7 @@
 }
 
 TEST_F(StubTest, StringIndexOf) {
-#if defined(__arm__) || defined(__aarch64__)
+#if defined(__arm__) || defined(__aarch64__) || (defined(__mips__) && defined(__LP64__))
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f7ed812..238d9f3 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -106,17 +106,16 @@
   return num_registers;
 }
 
-static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
-    SHARED_REQUIRES(Locks::mutator_lock_) {
+bool ArtMethod::HasSameNameAndSignature(ArtMethod* other) {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
-  const DexFile* dex_file = method1->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
-  if (method1->GetDexCache() == method2->GetDexCache()) {
-    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(GetDexMethodIndex());
+  if (GetDexCache() == other->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(other->GetDexMethodIndex());
     return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
   }
-  const DexFile* dex_file2 = method2->GetDexFile();
-  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  const DexFile* dex_file2 = other->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(other->GetDexMethodIndex());
   if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
     return false;  // Name mismatch.
   }
@@ -149,8 +148,7 @@
         mirror::Class* interface = iftable->GetInterface(i);
         for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
           ArtMethod* interface_method = interface->GetVirtualMethod(j, pointer_size);
-          if (HasSameNameAndSignature(
-              this, interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
+          if (HasSameNameAndSignature(interface_method->GetInterfaceMethodIfProxy(sizeof(void*)))) {
             result = interface_method;
             break;
           }
@@ -158,8 +156,9 @@
       }
     }
   }
-  DCHECK(result == nullptr || HasSameNameAndSignature(
-      GetInterfaceMethodIfProxy(sizeof(void*)), result->GetInterfaceMethodIfProxy(sizeof(void*))));
+  DCHECK(result == nullptr ||
+         GetInterfaceMethodIfProxy(sizeof(void*))->HasSameNameAndSignature(
+             result->GetInterfaceMethodIfProxy(sizeof(void*))));
   return result;
 }
 
@@ -299,7 +298,9 @@
         ShadowFrame* shadow_frame =
             self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
         mirror::Throwable* pending_exception = nullptr;
-        self->PopDeoptimizationContext(result, &pending_exception);
+        bool from_code = false;
+        self->PopDeoptimizationContext(result, &pending_exception, &from_code);
+        CHECK(!from_code);
         self->SetTopOfStack(nullptr);
         self->SetTopOfShadowStack(shadow_frame);
 
@@ -308,7 +309,7 @@
         if (pending_exception != nullptr) {
           self->SetException(pending_exception);
         }
-        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
+        interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
       }
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 5a2d6c3..8efad88 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -263,6 +263,9 @@
   mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve, size_t ptr_size)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Returns true if this method has the same name and signature of the other method.
+  bool HasSameNameAndSignature(ArtMethod* other) SHARED_REQUIRES(Locks::mutator_lock_);
+
   // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod(size_t pointer_size) SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index d6a44f7..8430d68 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -23,6 +23,7 @@
 
 #include "base/logging.h"
 #include "base/iteration_range.h"
+#include "base/stl_util.h"
 
 namespace art {
 
@@ -108,12 +109,12 @@
 }
 
 // For rounding integers.
-// NOTE: In the absence of std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>.
+// Note: Omit the `n` from T type deduction, deduce only from the `x` argument.
 template<typename T>
-static constexpr T RoundDown(T x, typename std::decay<T>::type n) WARN_UNUSED;
+static constexpr T RoundDown(T x, typename Identity<T>::type n) WARN_UNUSED;
 
 template<typename T>
-static constexpr T RoundDown(T x, typename std::decay<T>::type n) {
+static constexpr T RoundDown(T x, typename Identity<T>::type n) {
   return
       DCHECK_CONSTEXPR(IsPowerOfTwo(n), , T(0))
       (x & -n);
diff --git a/runtime/base/file_magic.cc b/runtime/base/file_magic.cc
new file mode 100644
index 0000000..9756338
--- /dev/null
+++ b/runtime/base/file_magic.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "file_magic.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "base/logging.h"
+#include "dex_file.h"
+#include "stringprintf.h"
+
+namespace art {
+
+ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
+  CHECK(magic != nullptr);
+  ScopedFd fd(open(filename, O_RDONLY, 0));
+  if (fd.get() == -1) {
+    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
+    return ScopedFd();
+  }
+  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
+  if (n != sizeof(*magic)) {
+    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
+    return ScopedFd();
+  }
+  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
+    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
+                              strerror(errno));
+    return ScopedFd();
+  }
+  return fd;
+}
+
+bool IsZipMagic(uint32_t magic) {
+  return (('P' == ((magic >> 0) & 0xff)) &&
+          ('K' == ((magic >> 8) & 0xff)));
+}
+
+bool IsDexMagic(uint32_t magic) {
+  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
+}
+
+}  // namespace art
diff --git a/runtime/base/file_magic.h b/runtime/base/file_magic.h
new file mode 100644
index 0000000..f7e4bad
--- /dev/null
+++ b/runtime/base/file_magic.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_FILE_MAGIC_H_
+#define ART_RUNTIME_BASE_FILE_MAGIC_H_
+
+#include <stdint.h>
+#include <string>
+
+#include "ScopedFd.h"
+
+namespace art {
+
+// Open file and read magic number
+ScopedFd OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg);
+
+// Check whether the given magic matches a known file type.
+bool IsZipMagic(uint32_t magic);
+bool IsDexMagic(uint32_t magic);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_FILE_MAGIC_H_
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 95baa82..fc1a52f 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -236,7 +236,7 @@
 
   // Returns how large the table is after being written. If target is null, then no writing happens
   // but the size is still returned. Target must be 8 byte aligned.
-  size_t WriteToMemory(uint8_t* ptr) {
+  size_t WriteToMemory(uint8_t* ptr) const {
     size_t offset = 0;
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_));
     offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_));
@@ -457,7 +457,7 @@
   }
 
   // Make sure that everything reinserts in the right spot. Returns the number of errors.
-  size_t Verify() {
+  size_t Verify() NO_THREAD_SAFETY_ANALYSIS {
     size_t errors = 0;
     for (size_t i = 0; i < num_buckets_; ++i) {
       T& element = data_[i];
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index 324ab21..ad03c31 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -156,6 +156,23 @@
   }
 };
 
+// Use to suppress type deduction for a function argument.
+// See std::identity<> for more background:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
+//
+// e.g. "template <typename X> void bar(identity<X>::type foo);
+//     bar(5); // compilation error
+//     bar<int>(5); // ok
+// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
+//     Base b;
+//     Derived d;
+//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
+// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
+template <typename T>
+struct Identity {
+  using type = T;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_STL_UTIL_H_
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 07cadc4..78bc3d5 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -17,12 +17,22 @@
 #include "base/unix_file/fd_file.h"
 
 #include <errno.h>
+#include <limits>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
 #include "base/logging.h"
 
+// Includes needed for FdFile::Copy().
+#ifdef __linux__
+#include <sys/sendfile.h>
+#else
+#include <algorithm>
+#include "base/stl_util.h"
+#include "globals.h"
+#endif
+
 namespace unix_file {
 
 FdFile::FdFile() : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true) {
@@ -222,6 +232,52 @@
   return true;
 }
 
+bool FdFile::Copy(FdFile* input_file, int64_t offset, int64_t size) {
+  off_t off = static_cast<off_t>(offset);
+  off_t sz = static_cast<off_t>(size);
+  if (offset < 0 || static_cast<int64_t>(off) != offset ||
+      size < 0 || static_cast<int64_t>(sz) != size ||
+      sz > std::numeric_limits<off_t>::max() - off) {
+    errno = EINVAL;
+    return false;
+  }
+  if (size == 0) {
+    return true;
+  }
+#ifdef __linux__
+  // Use sendfile(), available for files since linux kernel 2.6.33.
+  off_t end = off + sz;
+  while (off != end) {
+    int result = TEMP_FAILURE_RETRY(
+        sendfile(Fd(), input_file->Fd(), &off, end - off));
+    if (result == -1) {
+      return false;
+    }
+    // Ignore the number of bytes in `result`, sendfile() already updated `off`.
+  }
+#else
+  if (lseek(input_file->Fd(), off, SEEK_SET) != off) {
+    return false;
+  }
+  constexpr size_t kMaxBufferSize = 4 * ::art::kPageSize;
+  const size_t buffer_size = std::min<uint64_t>(size, kMaxBufferSize);
+  art::UniqueCPtr<void> buffer(malloc(buffer_size));
+  if (buffer == nullptr) {
+    errno = ENOMEM;
+    return false;
+  }
+  while (size != 0) {
+    size_t chunk_size = std::min<uint64_t>(buffer_size, size);
+    if (!input_file->ReadFully(buffer.get(), chunk_size) ||
+        !WriteFully(buffer.get(), chunk_size)) {
+      return false;
+    }
+    size -= chunk_size;
+  }
+#endif
+  return true;
+}
+
 void FdFile::Erase() {
   TEMP_FAILURE_RETRY(SetLength(0));
   TEMP_FAILURE_RETRY(Flush());
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index f47368b..231a1ab 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -50,12 +50,12 @@
   bool Open(const std::string& file_path, int flags, mode_t mode);
 
   // RandomAccessFile API.
-  virtual int Close() WARN_UNUSED;
-  virtual int64_t Read(char* buf, int64_t byte_count, int64_t offset) const WARN_UNUSED;
-  virtual int SetLength(int64_t new_length) WARN_UNUSED;
-  virtual int64_t GetLength() const;
-  virtual int64_t Write(const char* buf, int64_t byte_count, int64_t offset) WARN_UNUSED;
-  virtual int Flush() WARN_UNUSED;
+  int Close() OVERRIDE WARN_UNUSED;
+  int64_t Read(char* buf, int64_t byte_count, int64_t offset) const OVERRIDE WARN_UNUSED;
+  int SetLength(int64_t new_length) OVERRIDE WARN_UNUSED;
+  int64_t GetLength() const OVERRIDE;
+  int64_t Write(const char* buf, int64_t byte_count, int64_t offset) OVERRIDE WARN_UNUSED;
+  int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
   void Erase();
@@ -77,6 +77,9 @@
   bool PreadFully(void* buffer, size_t byte_count, size_t offset) WARN_UNUSED;
   bool WriteFully(const void* buffer, size_t byte_count) WARN_UNUSED;
 
+  // Copy data from another file.
+  bool Copy(FdFile* input_file, int64_t offset, int64_t size);
+
   // This enum is public so that we can define the << operator over it.
   enum class GuardState {
     kBase,           // Base, file has not been flushed or closed.
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 388f717..ecf607c 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -110,4 +110,34 @@
   ASSERT_EQ(file.Close(), 0);
 }
 
+TEST_F(FdFileTest, Copy) {
+  art::ScratchFile src_tmp;
+  FdFile src;
+  ASSERT_TRUE(src.Open(src_tmp.GetFilename(), O_RDWR));
+  ASSERT_GE(src.Fd(), 0);
+  ASSERT_TRUE(src.IsOpened());
+
+  char src_data[] = "Some test data.";
+  ASSERT_TRUE(src.WriteFully(src_data, sizeof(src_data)));  // Including the zero terminator.
+  ASSERT_EQ(0, src.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), src.GetLength());
+
+  art::ScratchFile dest_tmp;
+  FdFile dest;
+  ASSERT_TRUE(dest.Open(src_tmp.GetFilename(), O_RDWR));
+  ASSERT_GE(dest.Fd(), 0);
+  ASSERT_TRUE(dest.IsOpened());
+
+  ASSERT_TRUE(dest.Copy(&src, 0, sizeof(src_data)));
+  ASSERT_EQ(0, dest.Flush());
+  ASSERT_EQ(static_cast<int64_t>(sizeof(src_data)), dest.GetLength());
+
+  char check_data[sizeof(src_data)];
+  ASSERT_TRUE(dest.PreadFully(check_data, sizeof(src_data), 0u));
+  CHECK_EQ(0, memcmp(check_data, src_data, sizeof(src_data)));
+
+  ASSERT_EQ(0, dest.Close());
+  ASSERT_EQ(0, src.Close());
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/variant_map.h b/runtime/base/variant_map.h
index 82e5d2e..531cb37 100644
--- a/runtime/base/variant_map.h
+++ b/runtime/base/variant_map.h
@@ -19,8 +19,11 @@
 
 #include <memory.h>
 #include <map>
+#include <type_traits>
 #include <utility>
 
+#include "base/stl_util.h"
+
 namespace art {
 
 //
@@ -268,8 +271,9 @@
   }
 
   // Set a value for a given key, overwriting the previous value if any.
+  // Note: Omit the `value` from TValue type deduction, deduce only from the `key` argument.
   template <typename TValue>
-  void Set(const TKey<TValue>& key, const TValue& value) {
+  void Set(const TKey<TValue>& key, const typename Identity<TValue>::type& value) {
     // Clone the value first, to protect against &value == GetValuePtr(key).
     auto* new_value = new TValue(value);
 
@@ -279,8 +283,9 @@
 
   // Set a value for a given key, only if there was no previous value before.
   // Returns true if the value was set, false if a previous value existed.
+  // Note: Omit the `value` from TValue type deduction, deduce only from the `key` argument.
   template <typename TValue>
-  bool SetIfMissing(const TKey<TValue>& key, const TValue& value) {
+  bool SetIfMissing(const TKey<TValue>& key, const typename Identity<TValue>::type& value) {
     TValue* ptr = Get(key);
     if (ptr == nullptr) {
       Set(key, value);
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 88a3996..a5d10b2 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -116,6 +116,7 @@
   return resolved_method;
 }
 
+template <ClassLinker::ResolveMode kResolveMode>
 inline ArtMethod* ClassLinker::ResolveMethod(Thread* self,
                                              uint32_t method_idx,
                                              ArtMethod* referrer,
@@ -127,12 +128,12 @@
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile* dex_file = h_dex_cache->GetDexFile();
-    resolved_method = ResolveMethod(*dex_file,
-                                    method_idx,
-                                    h_dex_cache,
-                                    h_class_loader,
-                                    referrer,
-                                    type);
+    resolved_method = ResolveMethod<kResolveMode>(*dex_file,
+                                                  method_idx,
+                                                  h_dex_cache,
+                                                  h_class_loader,
+                                                  referrer,
+                                                  type);
   }
   // Note: We cannot check here to see whether we added the method to the cache. It
   //       might be an erroneous class, which results in it being hidden from us.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 2dd2a83..0a37f26 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -1017,6 +1017,15 @@
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
+  const ImageHeader& header = space->GetImageHeader();
+  const ImageSection& section = header.GetImageSection(ImageHeader::kSectionClassTable);
+  if (section.Size() > 0u) {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    ClassTable* const class_table = InsertClassTableForClassLoader(nullptr);
+    class_table->ReadFromMemory(space->Begin() + section.Offset());
+    dex_cache_boot_image_class_lookup_required_ = false;
+  }
+
   FinishInit(self);
 
   VLOG(startup) << "ClassLinker::InitFromImage exiting";
@@ -2786,9 +2795,11 @@
   Thread* self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
   ScopedAssertNoThreadSuspension ants(self, "Moving image classes to class table");
+
+  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+
   mirror::ObjectArray<mirror::DexCache>* dex_caches = GetImageDexCaches(image_space);
   std::string temp;
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
     mirror::DexCache* dex_cache = dex_caches->Get(i);
     GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
@@ -6149,6 +6160,7 @@
   return resolved;
 }
 
+template <ClassLinker::ResolveMode kResolveMode>
 ArtMethod* ClassLinker::ResolveMethod(const DexFile& dex_file,
                                       uint32_t method_idx,
                                       Handle<mirror::DexCache> dex_cache,
@@ -6160,6 +6172,12 @@
   ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx, image_pointer_size_);
   if (resolved != nullptr && !resolved->IsRuntimeMethod()) {
     DCHECK(resolved->GetDeclaringClassUnchecked() != nullptr) << resolved->GetDexMethodIndex();
+    if (kResolveMode == ClassLinker::kForceICCECheck) {
+      if (resolved->CheckIncompatibleClassChange(type)) {
+        ThrowIncompatibleClassChangeError(type, resolved->GetInvokeType(), resolved, referrer);
+        return nullptr;
+      }
+    }
     return resolved;
   }
   // Fail, get the declaring class.
@@ -6178,8 +6196,36 @@
       DCHECK(resolved == nullptr || resolved->GetDeclaringClassUnchecked() != nullptr);
       break;
     case kInterface:
-      resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
-      DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
+      // We have to check whether the method id really belongs to an interface (dex static bytecode
+      // constraint A15). Otherwise you must not invoke-interface on it.
+      //
+      // This is not symmetric to A12-A14 (direct, static, virtual), as using FindInterfaceMethod
+      // assumes that the given type is an interface, and will check the interface table if the
+      // method isn't declared in the class. So it may find an interface method (usually by name
+      // in the handling below, but we do the constraint check early). In that case,
+      // CheckIncompatibleClassChange will succeed (as it is called on an interface method)
+      // unexpectedly.
+      // Example:
+      //    interface I {
+      //      foo()
+      //    }
+      //    class A implements I {
+      //      ...
+      //    }
+      //    class B extends A {
+      //      ...
+      //    }
+      //    invoke-interface B.foo
+      //      -> FindInterfaceMethod finds I.foo (interface method), not A.foo (miranda method)
+      if (UNLIKELY(!klass->IsInterface())) {
+        ThrowIncompatibleClassChangeError(klass,
+                                          "Found class %s, but interface was expected",
+                                          PrettyDescriptor(klass).c_str());
+        return nullptr;
+      } else {
+        resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
+        DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
+      }
       break;
     case kSuper:  // Fall-through.
     case kVirtual:
@@ -6781,4 +6827,20 @@
   }
 }
 
+// Instantiate ResolveMethod.
+template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
+    const DexFile& dex_file,
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    ArtMethod* referrer,
+    InvokeType type);
+template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+    const DexFile& dex_file,
+    uint32_t method_idx,
+    Handle<mirror::DexCache> dex_cache,
+    Handle<mirror::ClassLoader> class_loader,
+    ArtMethod* referrer,
+    InvokeType type);
+
 }  // namespace art
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 29aac31..0d3bc1e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -246,11 +246,19 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
+  // Determine whether a dex cache result should be trusted, or an IncompatibleClassChangeError
+  // check should be performed even after a hit.
+  enum ResolveMode {  // private.
+    kNoICCECheckForCache,
+    kForceICCECheck
+  };
+
   // Resolve a method with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. What is unique is the method type argument which
   // is used to determine if this method is a direct, static, or
   // virtual method.
+  template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(const DexFile& dex_file,
                            uint32_t method_idx,
                            Handle<mirror::DexCache> dex_cache,
@@ -262,6 +270,7 @@
 
   ArtMethod* GetResolvedMethod(uint32_t method_idx, ArtMethod* referrer)
       SHARED_REQUIRES(Locks::mutator_lock_);
+  template <ResolveMode kResolveMode>
   ArtMethod* ResolveMethod(Thread* self, uint32_t method_idx, ArtMethod* referrer, InvokeType type)
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 3ed1c95..df2dbf4 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -115,7 +115,7 @@
   return false;
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const GcRoot<mirror::Class>& root)
     const {
   std::string temp;
   return ComputeModifiedUtf8Hash(root.Read()->GetDescriptor(&temp));
@@ -133,7 +133,7 @@
   return a.Read()->DescriptorEquals(descriptor);
 }
 
-std::size_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
+uint32_t ClassTable::ClassDescriptorHashEquals::operator()(const char* descriptor) const {
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
@@ -148,4 +148,29 @@
   return true;
 }
 
+size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
+  ClassSet combined;
+  // Combine all the class sets in case there are multiple, also adjusts load factor back to
+  // default in case classes were pruned.
+  for (const ClassSet& class_set : classes_) {
+    for (const GcRoot<mirror::Class>& root : class_set) {
+      combined.Insert(root);
+    }
+  }
+  const size_t ret = combined.WriteToMemory(ptr);
+  // Sanity check.
+  if (kIsDebugBuild && ptr != nullptr) {
+    size_t read_count;
+    ClassSet class_set(ptr, /*make copy*/false, &read_count);
+    class_set.Verify();
+  }
+  return ret;
+}
+
+size_t ClassTable::ReadFromMemory(uint8_t* ptr) {
+  size_t read_count = 0;
+  classes_.insert(classes_.begin(), ClassSet(ptr, /*make copy*/false, &read_count));
+  return read_count;
+}
+
 }  // namespace art
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 002bb56..c911365 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -104,17 +104,27 @@
       REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
+  // Combines all of the tables into one class set.
+  size_t WriteToMemory(uint8_t* ptr) const
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  size_t ReadFromMemory(uint8_t* ptr)
+      REQUIRES(Locks::classlinker_classes_lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
  private:
   class ClassDescriptorHashEquals {
    public:
+    // uint32_t for cross compilation.
+    uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
-    std::size_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
         NO_THREAD_SAFETY_ANALYSIS;;
     // Same descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
-    std::size_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
+    // uint32_t for cross compilation.
+    uint32_t operator()(const char* descriptor) const NO_THREAD_SAFETY_ANALYSIS;
   };
   class GcRootEmptyFn {
    public:
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 32e77b7..51f57c3 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -69,10 +69,25 @@
   return alloc_record_count;
 }
 
+// Takes a method and returns a 'canonical' one if the method is default (and therefore potentially
+// copied from some other class). This ensures that the debugger does not get confused as to which
+// method we are in.
+static ArtMethod* GetCanonicalMethod(ArtMethod* m)
+    SHARED_REQUIRES(Locks::mutator_lock_) {
+  if (LIKELY(!m->IsDefault())) {
+    return m;
+  } else {
+    mirror::Class* declaring_class = m->GetDeclaringClass();
+    return declaring_class->FindDeclaredVirtualMethod(declaring_class->GetDexCache(),
+                                                      m->GetDexMethodIndex(),
+                                                      sizeof(void*));
+  }
+}
+
 class Breakpoint : public ValueObject {
  public:
   Breakpoint(ArtMethod* method, uint32_t dex_pc, DeoptimizationRequest::Kind deoptimization_kind)
-    : method_(method),
+    : method_(GetCanonicalMethod(method)),
       dex_pc_(dex_pc),
       deoptimization_kind_(deoptimization_kind) {
     CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
@@ -99,6 +114,12 @@
     return deoptimization_kind_;
   }
 
+  // Returns true if the method of this breakpoint and the passed in method should be considered the
+  // same. That is, they are either the same method or they are copied from the same method.
+  bool IsInMethod(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_) {
+    return method_ == GetCanonicalMethod(m);
+  }
+
  private:
   // The location of this breakpoint.
   ArtMethod* method_;
@@ -306,12 +327,12 @@
   return dex_pcs_.find(dex_pc) == dex_pcs_.end();
 }
 
-static bool IsBreakpoint(const ArtMethod* m, uint32_t dex_pc)
+static bool IsBreakpoint(ArtMethod* m, uint32_t dex_pc)
     REQUIRES(!Locks::breakpoint_lock_)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   ReaderMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
     }
@@ -1282,9 +1303,9 @@
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
-static JDWP::MethodId ToMethodId(const ArtMethod* m)
+static JDWP::MethodId ToMethodId(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_) {
-  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
+  return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(GetCanonicalMethod(m)));
 }
 
 static ArtField* FromFieldId(JDWP::FieldId fid)
@@ -2763,7 +2784,7 @@
   if (m == nullptr) {
     memset(location, 0, sizeof(*location));
   } else {
-    location->method = m;
+    location->method = GetCanonicalMethod(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint32_t>(-1) : dex_pc;
   }
 }
@@ -3214,7 +3235,7 @@
 static const Breakpoint* FindFirstBreakpointForMethod(ArtMethod* m)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       return &breakpoint;
     }
   }
@@ -3231,7 +3252,7 @@
                                            DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_REQUIRES(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    if (breakpoint.Method() == m) {
+    if (breakpoint.IsInMethod(m)) {
       CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
     }
   }
@@ -3274,12 +3295,15 @@
 
   if (first_breakpoint == nullptr) {
     // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // inlined or default, we deoptimize everything; otherwise we deoptimize only this method. We
+    // deoptimize with defaults because we do not know everywhere they are used. It is possible some
+    // of the copies could be inlined or otherwise missed.
+    // TODO Deoptimizing on default methods might not be necessary in all cases.
     // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
     // Therefore we must not hold any lock when we call it.
-    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    bool need_full_deoptimization = m->IsDefault() || IsMethodPossiblyInlined(self, m);
     if (need_full_deoptimization) {
-      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+      VLOG(jdwp) << "Need full deoptimization because of possible inlining or copying of method "
                  << PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
@@ -3359,7 +3383,7 @@
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
   DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
-    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
+    if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].IsInMethod(m)) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
       deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
       DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 4163e2e..30d921a 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,6 +29,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/file_magic.h"
 #include "base/hash_map.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
@@ -62,26 +63,6 @@
 const uint8_t DexFile::kDexMagic[] = { 'd', 'e', 'x', '\n' };
 const uint8_t DexFile::kDexMagicVersion[] = { '0', '3', '5', '\0' };
 
-static int OpenAndReadMagic(const char* filename, uint32_t* magic, std::string* error_msg) {
-  CHECK(magic != nullptr);
-  ScopedFd fd(open(filename, O_RDONLY, 0));
-  if (fd.get() == -1) {
-    *error_msg = StringPrintf("Unable to open '%s' : %s", filename, strerror(errno));
-    return -1;
-  }
-  int n = TEMP_FAILURE_RETRY(read(fd.get(), magic, sizeof(*magic)));
-  if (n != sizeof(*magic)) {
-    *error_msg = StringPrintf("Failed to find magic in '%s'", filename);
-    return -1;
-  }
-  if (lseek(fd.get(), 0, SEEK_SET) != 0) {
-    *error_msg = StringPrintf("Failed to seek to beginning of file '%s' : %s", filename,
-                              strerror(errno));
-    return -1;
-  }
-  return fd.release();
-}
-
 bool DexFile::GetChecksum(const char* filename, uint32_t* checksum, std::string* error_msg) {
   CHECK(checksum != nullptr);
   uint32_t magic;
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 438b6b8..3f62124 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -189,8 +189,17 @@
         case CONST_STRING:
           if (file != nullptr) {
             uint32_t string_idx = VRegB_21c();
-            os << StringPrintf("const-string v%d, %s // string@%d", VRegA_21c(),
-                               PrintableString(file->StringDataByIdx(string_idx)).c_str(), string_idx);
+            if (string_idx < file->NumStringIds()) {
+              os << StringPrintf("const-string v%d, %s // string@%d",
+                                 VRegA_21c(),
+                                 PrintableString(file->StringDataByIdx(string_idx)).c_str(),
+                                 string_idx);
+            } else {
+              os << StringPrintf("const-string v%d, <<invalid-string-idx-%d>> // string@%d",
+                                 VRegA_21c(),
+                                 string_idx,
+                                 string_idx);
+            }
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -348,9 +357,19 @@
       if (Opcode() == CONST_STRING_JUMBO) {
         uint32_t string_idx = VRegB_31c();
         if (file != nullptr) {
-          os << StringPrintf("%s v%d, %s // string@%d", opcode, VRegA_31c(),
-                             PrintableString(file->StringDataByIdx(string_idx)).c_str(),
-                             string_idx);
+          if (string_idx < file->NumStringIds()) {
+            os << StringPrintf("%s v%d, %s // string@%d",
+                               opcode,
+                               VRegA_31c(),
+                               PrintableString(file->StringDataByIdx(string_idx)).c_str(),
+                               string_idx);
+          } else {
+            os << StringPrintf("%s v%d, <<invalid-string-idx-%d>> // string@%d",
+                               opcode,
+                               VRegA_31c(),
+                               string_idx,
+                               string_idx);
+          }
         } else {
           os << StringPrintf("%s v%d, string@%d", opcode, VRegA_31c(), string_idx);
         }
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 21e4e44..ba2fb94 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -68,7 +68,7 @@
     class_loader.Assign(caller->GetClassLoader());
   }
 
-  return class_linker->ResolveMethod(
+  return class_linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
       *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type);
 }
 
@@ -401,7 +401,10 @@
     mirror::Object* null_this = nullptr;
     HandleWrapper<mirror::Object> h_this(
         hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
-    resolved_method = class_linker->ResolveMethod(self, method_idx, referrer, type);
+    constexpr ClassLinker::ResolveMode resolve_mode =
+        access_check ? ClassLinker::kForceICCECheck
+                     : ClassLinker::kNoICCECheckForCache;
+    resolved_method = class_linker->ResolveMethod<resolve_mode>(self, method_idx, referrer, type);
   }
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
@@ -598,8 +601,12 @@
   } else if (type == kStatic || type == kDirect) {
     return resolved_method;
   } else if (type == kSuper) {
-    return referrer->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
-        resolved_method->GetMethodIndex(), sizeof(void*));
+    mirror::Class* super_class = referrer->GetDeclaringClass()->GetSuperClass();
+    if (resolved_method->GetMethodIndex() >= super_class->GetVTableLength()) {
+      // The super class does not have the method.
+      return nullptr;
+    }
+    return super_class->GetVTableEntry(resolved_method->GetMethodIndex(), sizeof(void*));
   } else {
     DCHECK(type == kVirtual);
     return this_object->GetClass()->GetVTableEntry(
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index dfd9fcd..c019cae 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -52,7 +52,7 @@
   // Before deoptimizing to interpreter, we must push the deoptimization context.
   JValue return_value;
   return_value.SetJ(0);  // we never deoptimize from compiled code with an invoke result.
-  self->PushDeoptimizationContext(return_value, false, self->GetException());
+  self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException());
 
   QuickExceptionHandler exception_handler(self, true);
   exception_handler.DeoptimizeSingleFrame();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index abf9ac4..08c9b49 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -685,7 +685,9 @@
     }
 
     mirror::Throwable* pending_exception = nullptr;
-    self->PopDeoptimizationContext(&result, &pending_exception);
+    bool from_code = false;
+    self->PopDeoptimizationContext(&result, &pending_exception, /* out */ &from_code);
+    CHECK(from_code);
 
     // Push a transition back into managed code onto the linked list in thread.
     self->PushManagedStackFragment(&fragment);
@@ -712,7 +714,7 @@
     if (pending_exception != nullptr) {
       self->SetException(pending_exception);
     }
-    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, &result);
+    interpreter::EnterInterpreterFromDeoptimize(self, deopt_frame, from_code, &result);
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension(
         "Building interpreter shadow frame");
@@ -754,7 +756,8 @@
   if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) {
     // Push the context of the deoptimization stack so we can restore the return value and the
     // exception before executing the deoptimized frames.
-    self->PushDeoptimizationContext(result, shorty[0] == 'L', self->GetException());
+    self->PushDeoptimizationContext(
+        result, shorty[0] == 'L', /* from_code */ false, self->GetException());
 
     // Set special exception to cause deoptimization.
     self->SetException(Thread::GetDeoptimizationException());
@@ -1012,22 +1015,29 @@
     HandleWrapper<mirror::Object> h_receiver(
         hs.NewHandleWrapper(virtual_or_interface ? &receiver : &dummy));
     DCHECK_EQ(caller->GetDexFile(), called_method.dex_file);
-    called = linker->ResolveMethod(self, called_method.dex_method_index, caller, invoke_type);
+    called = linker->ResolveMethod<ClassLinker::kForceICCECheck>(
+        self, called_method.dex_method_index, caller, invoke_type);
   }
   const void* code = nullptr;
   if (LIKELY(!self->IsExceptionPending())) {
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
         << PrettyMethod(called) << " " << invoke_type;
-    if (virtual_or_interface) {
-      // Refine called method based on receiver.
-      CHECK(receiver != nullptr) << invoke_type;
-
+    if (virtual_or_interface || invoke_type == kSuper) {
+      // Refine called method based on receiver for kVirtual/kInterface, and
+      // caller for kSuper.
       ArtMethod* orig_called = called;
       if (invoke_type == kVirtual) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForVirtual(called, sizeof(void*));
-      } else {
+      } else if (invoke_type == kInterface) {
+        CHECK(receiver != nullptr) << invoke_type;
         called = receiver->GetClass()->FindVirtualMethodForInterface(called, sizeof(void*));
+      } else {
+        DCHECK_EQ(invoke_type, kSuper);
+        CHECK(caller != nullptr) << invoke_type;
+        called = caller->GetDeclaringClass()->GetSuperClass()->GetVTableEntry(
+            called->GetMethodIndex(), sizeof(void*));
       }
 
       CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 9c8e4df..7d00094 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1526,10 +1526,9 @@
   }
 }
 
+// Below may be called by mutator itself just before thread termination.
 size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
-  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
-  ReaderMutexLock wmu(self, bulk_free_lock_);
   size_t free_bytes = 0U;
   for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
@@ -1544,10 +1543,17 @@
       // Count the number of free slots left.
       size_t num_free_slots = thread_local_run->NumberOfFreeSlots();
       free_bytes += num_free_slots * bracketSizes[idx];
+      // The above bracket index lock guards thread local free list to avoid race condition
+      // with unioning bulk free list to thread local free list by GC thread in BulkFree.
+      // If thread local run is true, GC thread will help update thread local free list
+      // in BulkFree. And the latest thread local free list will be merged to free list
+      // either when this thread local run is full or when revoking this run here. In this
+      // case the free list wll be updated. If thread local run is false, GC thread will help
+      // merge bulk free list in next BulkFree.
+      // Thus no need to merge bulk free list to free list again here.
       bool dont_care;
       thread_local_run->MergeThreadLocalFreeListToFreeList(&dont_care);
       thread_local_run->SetIsThreadLocal(false);
-      thread_local_run->MergeBulkFreeListToFreeList();
       DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end());
       DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end());
       RevokeRun(self, idx, thread_local_run);
diff --git a/runtime/image.cc b/runtime/image.cc
index 1bc19ff..2eac3fb 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '2', '3', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/image.h b/runtime/image.h
index 555cf5d..a16f3c9 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -170,6 +170,7 @@
     kSectionArtMethods,
     kSectionDexCacheArrays,
     kSectionInternedStrings,
+    kSectionClassTable,
     kSectionImageBitmap,
     kSectionCount,  // Number of elements in enum.
   };
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index d13526b..2d0ae63 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -344,8 +344,11 @@
     segment_state_.all = new_state;
   }
 
-  static Offset SegmentStateOffset() {
-    return Offset(OFFSETOF_MEMBER(IndirectReferenceTable, segment_state_));
+  static Offset SegmentStateOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
+    // Note: Currently segment_state_ is at offset 0. We're testing the expected value in
+    //       jni_internal_test to make sure it stays correct. It is not OFFSETOF_MEMBER, as that
+    //       is not pointer-size-safe.
+    return Offset(0);
   }
 
   // Release pages past the end of the table that may have previously held references.
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index bc2c197..264cd2c 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1062,7 +1062,9 @@
                                 PrettyMethod(method).c_str(),
                                 return_value.GetJ()) << *self;
     }
-    self->PushDeoptimizationContext(return_value, return_shorty == 'L',
+    self->PushDeoptimizationContext(return_value,
+                                    return_shorty == 'L',
+                                    false /* from_code */,
                                     nullptr /* no pending exception */);
     return GetTwoWordSuccessValue(*return_pc,
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d686f74..871fad7 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -397,7 +397,10 @@
   self->PopShadowFrame();
 }
 
-void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, JValue* ret_val)
+void EnterInterpreterFromDeoptimize(Thread* self,
+                                    ShadowFrame* shadow_frame,
+                                    bool from_code,
+                                    JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_) {
   JValue value;
   // Set value to last known result in case the shadow frame chain is empty.
@@ -408,7 +411,7 @@
     self->SetTopOfShadowStack(shadow_frame);
     const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
     const uint32_t dex_pc = shadow_frame->GetDexPC();
-    uint32_t new_dex_pc;
+    uint32_t new_dex_pc = dex_pc;
     if (UNLIKELY(self->IsExceptionPending())) {
       // If we deoptimize from the QuickExceptionHandler, we already reported the exception to
       // the instrumentation. To prevent from reporting it a second time, we simply pass a
@@ -419,11 +422,16 @@
                                                                     instrumentation);
       new_dex_pc = found_dex_pc;  // the dex pc of a matching catch handler
                                   // or DexFile::kDexNoIndex if there is none.
-    } else {
-      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-      // For an invoke, use the dex pc of the next instruction.
+    } else if (!from_code) {
+      // For the debugger and full deoptimization stack, we must go past the invoke
+      // instruction, as it already executed.
       // TODO: should be tested more once b/17586779 is fixed.
-      new_dex_pc = dex_pc + (instr->IsInvoke() ? instr->SizeInCodeUnits() : 0);
+      const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+      DCHECK(instr->IsInvoke());
+      new_dex_pc = dex_pc + instr->SizeInCodeUnits();
+    } else {
+      // Nothing to do, the dex_pc is the one at which the code requested
+      // the deoptimization.
     }
     if (new_dex_pc != DexFile::kDexNoIndex) {
       shadow_frame->SetDexPC(new_dex_pc);
@@ -432,6 +440,8 @@
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     ShadowFrame::DeleteDeoptimizedFrame(old_frame);
+    // Following deoptimizations of shadow frames must pass the invoke instruction.
+    from_code = false;
     first = false;
   }
   ret_val->SetJ(value.GetJ());
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index b21ea84..8e7f3da 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -37,7 +37,8 @@
                                        mirror::Object* receiver, uint32_t* args, JValue* result)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
-extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame,
+// 'from_code' denotes whether the deoptimization was explicitly triggered by compiled code.
+extern void EnterInterpreterFromDeoptimize(Thread* self, ShadowFrame* shadow_frame, bool from_code,
                                            JValue* ret_val)
     SHARED_REQUIRES(Locks::mutator_lock_);
 
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ecbf13c..92aa86e 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -24,6 +24,8 @@
 #include "interpreter/interpreter.h"
 #include "jit_code_cache.h"
 #include "jit_instrumentation.h"
+#include "oat_file_manager.h"
+#include "offline_profiling_info.h"
 #include "runtime.h"
 #include "runtime_options.h"
 #include "utils.h"
@@ -44,6 +46,8 @@
       options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
       options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
+  jit_options->save_profiling_info_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);;
   return jit_options;
 }
 
@@ -76,6 +80,10 @@
   if (jit->GetCodeCache() == nullptr) {
     return nullptr;
   }
+  jit->offline_profile_info_.reset(nullptr);
+  if (options->GetSaveProfilingInfo()) {
+    jit->offline_profile_info_.reset(new OfflineProfilingInfo());
+  }
   LOG(INFO) << "JIT created with initial_capacity="
       << PrettySize(options->GetCodeCacheInitialCapacity())
       << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity())
@@ -134,11 +142,24 @@
 
 bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
   DCHECK(!method->IsRuntimeMethod());
+  // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
     VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
     return false;
   }
-  return jit_compile_method_(jit_compiler_handle_, method, self);
+
+  // Don't compile the method if we are supposed to be deoptimized.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
+    return false;
+  }
+
+  if (!code_cache_->NotifyCompilationOf(method, self)) {
+    return false;
+  }
+  bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+  code_cache_->DoneCompiling(method, self);
+  return success;
 }
 
 void Jit::CreateThreadPool() {
@@ -152,6 +173,33 @@
   }
 }
 
+void Jit::SaveProfilingInfo(const std::string& filename) {
+  if (offline_profile_info_ == nullptr) {
+    return;
+  }
+  // Note that we can't check the PrimaryOatFile when constructing the offline_profilie_info_
+  // because it becomes known to the Runtime after we create and initialize the JIT.
+  const OatFile* primary_oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile();
+  if (primary_oat_file == nullptr) {
+    LOG(WARNING) << "Couldn't find a primary oat file when trying to save profile info to "
+                 << filename;
+    return;
+  }
+
+  uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs();
+  if (offline_profile_info_->NeedsSaving(last_update_ns)) {
+    VLOG(profiler) << "Iniate save profiling information to: " << filename;
+    std::set<ArtMethod*> methods;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      code_cache_->GetCompiledArtMethods(primary_oat_file, methods);
+    }
+    offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods);
+  } else {
+    VLOG(profiler) << "No need to save profiling information to: " << filename;
+  }
+}
+
 Jit::~Jit() {
   if (dump_info_on_shutdown_) {
     DumpInfo(LOG(INFO));
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index fc76549..630eba3 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -26,6 +26,7 @@
 #include "gc_root.h"
 #include "jni.h"
 #include "object_callbacks.h"
+#include "offline_profiling_info.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -71,6 +72,8 @@
     return instrumentation_cache_.get();
   }
 
+  void SaveProfilingInfo(const std::string& filename);
+
  private:
   Jit();
   bool LoadCompiler(std::string* error_msg);
@@ -90,6 +93,7 @@
   std::unique_ptr<jit::JitCodeCache> code_cache_;
   CompilerCallbacks* compiler_callbacks_;  // Owned by the jit compiler.
 
+  std::unique_ptr<OfflineProfilingInfo> offline_profile_info_;
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
@@ -111,12 +115,18 @@
   bool DumpJitInfoOnShutdown() const {
     return dump_info_on_shutdown_;
   }
+  bool GetSaveProfilingInfo() const {
+    return save_profiling_info_;
+  }
   bool UseJIT() const {
     return use_jit_;
   }
   void SetUseJIT(bool b) {
     use_jit_ = b;
   }
+  void SetSaveProfilingInfo(bool b) {
+    save_profiling_info_ = b;
+  }
 
  private:
   bool use_jit_;
@@ -125,13 +135,15 @@
   size_t compile_threshold_;
   size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
+  bool save_profiling_info_;
 
   JitOptions()
       : use_jit_(false),
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
-        dump_info_on_shutdown_(false) { }
+        dump_info_on_shutdown_(false),
+        save_profiling_info_(false) { }
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index da79109..3342e92 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "art_method-inl.h"
+#include "base/time_utils.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "jit/profiling_info.h"
@@ -109,7 +110,8 @@
       current_capacity_(initial_code_capacity + initial_data_capacity),
       code_end_(initial_code_capacity),
       data_end_(initial_data_capacity),
-      has_done_one_collection_(false) {
+      has_done_one_collection_(false),
+      last_update_time_ns_(0) {
 
   code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
@@ -314,6 +316,7 @@
       // code.
       GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
     }
+    last_update_time_ns_ = NanoTime();
     VLOG(jit)
         << "JIT added "
         << PrettyMethod(method) << "@" << method
@@ -533,7 +536,9 @@
       instrumentation->UpdateMethodsCode(it.second, GetQuickToInterpreterBridge());
     }
     for (ProfilingInfo* info : profiling_infos_) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
+      if (!info->IsMethodBeingCompiled()) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+      }
     }
   }
 
@@ -574,12 +579,17 @@
       }
     }
 
-    // Free all profiling info.
-    for (ProfilingInfo* info : profiling_infos_) {
-      DCHECK(info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr);
-      mspace_free(data_mspace_, reinterpret_cast<uint8_t*>(info));
-    }
-    profiling_infos_.clear();
+    void* data_mspace = data_mspace_;
+    // Free all profiling infos of methods that were not being compiled.
+    auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
+      [data_mspace] (ProfilingInfo* info) {
+        if (info->GetMethod()->GetProfilingInfo(sizeof(void*)) == nullptr) {
+          mspace_free(data_mspace, reinterpret_cast<uint8_t*>(info));
+          return true;
+        }
+        return false;
+      });
+    profiling_infos_.erase(profiling_kept_end, profiling_infos_.end());
 
     live_bitmap_.reset(nullptr);
     has_done_one_collection_ = true;
@@ -640,7 +650,7 @@
                                                       ArtMethod* method,
                                                       const std::vector<uint32_t>& entries) {
   size_t profile_info_size = RoundUp(
-      sizeof(ProfilingInfo) + sizeof(ProfilingInfo::InlineCache) * entries.size(),
+      sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size(),
       sizeof(void*));
   ScopedThreadSuspension sts(self, kSuspended);
   MutexLock mu(self, lock_);
@@ -677,5 +687,39 @@
   }
 }
 
+void JitCodeCache::GetCompiledArtMethods(const OatFile* oat_file,
+                                         std::set<ArtMethod*>& methods) {
+  MutexLock mu(Thread::Current(), lock_);
+  for (auto it : method_code_map_) {
+    if (it.second->GetDexFile()->GetOatDexFile()->GetOatFile() == oat_file) {
+      methods.insert(it.second);
+    }
+  }
+}
+
+uint64_t JitCodeCache::GetLastUpdateTimeNs() {
+  MutexLock mu(Thread::Current(), lock_);
+  return last_update_time_ns_;
+}
+
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
+  if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+    return false;
+  }
+  MutexLock mu(self, lock_);
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  if (info == nullptr || info->IsMethodBeingCompiled()) {
+    return false;
+  }
+  info->SetIsMethodBeingCompiled(true);
+  return true;
+}
+
+void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSED) {
+  ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
+  DCHECK(info->IsMethodBeingCompiled());
+  info->SetIsMethodBeingCompiled(false);
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 13481e0..4032c7b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -66,6 +66,14 @@
   // of methods that got JIT compiled, as we might have collected some.
   size_t NumberOfCompiledCode() REQUIRES(!lock_);
 
+  bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
+  void DoneCompiling(ArtMethod* method, Thread* self)
+      SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!lock_);
+
   // Allocate and write code and its metadata to the code cache.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
@@ -139,6 +147,13 @@
 
   void* MoreCore(const void* mspace, intptr_t increment);
 
+  // Adds to `methods` all the compiled ArtMethods which are part of the given `oat_file`.
+  void GetCompiledArtMethods(const OatFile* oat_file, std::set<ArtMethod*>& methods)
+      REQUIRES(!lock_)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
+  uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_);
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -228,6 +243,9 @@
   // Whether a collection has already been done on the current capacity.
   bool has_done_one_collection_ GUARDED_BY(lock_);
 
+  // Last time the the code_cache was updated.
+  uint64_t last_update_time_ns_ GUARDED_BY(lock_);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache);
 };
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
new file mode 100644
index 0000000..4450653
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.cc
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offline_profiling_info.h"
+
+#include <fstream>
+#include <set>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+
+#include "art_method-inl.h"
+#include "base/mutex.h"
+#include "jit/profiling_info.h"
+#include "safe_map.h"
+#include "utils.h"
+
+namespace art {
+
+// An arbitrary value to throttle save requests. Set to 500ms for now.
+static constexpr const uint64_t kMilisecondsToNano = 1000000;
+static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano;
+
+bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const {
+  return last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs;
+}
+
+void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename,
+                                             uint64_t last_update_time_ns,
+                                             const std::set<ArtMethod*>& methods) {
+  if (!NeedsSaving(last_update_time_ns)) {
+    VLOG(profiler) << "No need to saved profile info to " << filename;
+    return;
+  }
+
+  if (methods.empty()) {
+    VLOG(profiler) << "No info to save to " << filename;
+    return;
+  }
+
+  DexFileToMethodsMap info;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    for (auto it = methods.begin(); it != methods.end(); it++) {
+      AddMethodInfo(*it, &info);
+    }
+  }
+
+  // This doesn't need locking because we are trying to lock the file for exclusive
+  // access and fail immediately if we can't.
+  if (Serialize(filename, info)) {
+    last_update_time_ns_.StoreRelaxed(last_update_time_ns);
+    VLOG(profiler) << "Successfully saved profile info to "
+                   << filename << " with time stamp: " << last_update_time_ns;
+  }
+}
+
+
+void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) {
+  DCHECK(method != nullptr);
+  const DexFile* dex_file = method->GetDexFile();
+
+  auto info_it = info->find(dex_file);
+  if (info_it == info->end()) {
+    info_it = info->Put(dex_file, std::set<uint32_t>());
+  }
+  info_it->second.insert(method->GetDexMethodIndex());
+}
+
+static int OpenOrCreateFile(const std::string& filename) {
+  // TODO(calin) allow the shared uid of the app to access the file.
+  int fd = open(filename.c_str(),
+                O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC,
+                S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    PLOG(WARNING) << "Failed to open profile file " << filename;
+    return -1;
+  }
+
+  // Lock the file for exclusive access but don't wait if we can't lock it.
+  int err = flock(fd, LOCK_EX | LOCK_NB);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to lock profile file " << filename;
+    return -1;
+  }
+
+  return fd;
+}
+
+static bool CloseDescriptorForFile(int fd, const std::string& filename) {
+  // Now unlock the file, allowing another process in.
+  int err = flock(fd, LOCK_UN);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to unlock profile file " << filename;
+    return false;
+  }
+
+  // Done, close the file.
+  err = ::close(fd);
+  if (err < 0) {
+    PLOG(WARNING) << "Failed to close descriptor for profile file" << filename;
+    return false;
+  }
+
+  return true;
+}
+
+static void WriteToFile(int fd, const std::ostringstream& os) {
+  std::string data(os.str());
+  const char *p = data.c_str();
+  size_t length = data.length();
+  do {
+    int n = ::write(fd, p, length);
+    p += n;
+    length -= n;
+  } while (length > 0);
+}
+
+static constexpr char kFieldSeparator = ',';
+static constexpr char kLineSeparator = '\n';
+
+/**
+ * Serialization format:
+ *    multidex_suffix1,dex_location_checksum1,method_id11,method_id12...
+ *    multidex_suffix2,dex_location_checksum2,method_id21,method_id22...
+ * e.g.
+ *    ,131232145,11,23,454,54               -> this is the first dex file, it has no multidex suffix
+ *    :classes5.dex,218490184,39,13,49,1    -> this is the fifth dex file.
+ **/
+bool OfflineProfilingInfo::Serialize(const std::string& filename,
+                                     const DexFileToMethodsMap& info) const {
+  int fd = OpenOrCreateFile(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  // TODO(calin): Merge with a previous existing profile.
+  // TODO(calin): Profile this and see how much memory it takes. If too much,
+  // write to file directly.
+  std::ostringstream os;
+  for (auto it : info) {
+    const DexFile* dex_file = it.first;
+    const std::set<uint32_t>& method_dex_ids = it.second;
+
+    os << DexFile::GetMultiDexSuffix(dex_file->GetLocation())
+        << kFieldSeparator
+        << dex_file->GetLocationChecksum();
+    for (auto method_it : method_dex_ids) {
+      os << kFieldSeparator << method_it;
+    }
+    os << kLineSeparator;
+  }
+
+  WriteToFile(fd, os);
+
+  return CloseDescriptorForFile(fd, filename);
+}
+}  // namespace art
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
new file mode 100644
index 0000000..e3117eb
--- /dev/null
+++ b/runtime/jit/offline_profiling_info.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
+
+#include <set>
+
+#include "atomic.h"
+#include "dex_file.h"
+#include "safe_map.h"
+
+namespace art {
+
+class ArtMethod;
+
+/**
+ * Profiling information in a format that can be serialized to disk.
+ * It is a serialize-friendly format based on information collected
+ * by the interpreter (ProfileInfo).
+ * Currently it stores only the hot compiled methods.
+ */
+class OfflineProfilingInfo {
+ public:
+  bool NeedsSaving(uint64_t last_update_time_ns) const;
+  void SaveProfilingInfo(const std::string& filename,
+                         uint64_t last_update_time_ns,
+                         const std::set<ArtMethod*>& methods);
+
+ private:
+  // Map identifying the location of the profiled methods.
+  // dex_file_ -> [dex_method_index]+
+  using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>;
+
+  void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+  bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const;
+
+  // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a
+  // singe thread?)
+  Atomic<uint64_t> last_update_time_ns_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2e52b1b..dcb346c 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -54,28 +54,29 @@
     code_ptr += instruction.SizeInCodeUnits();
   }
 
-  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
-  // object, it will never be filled.
-  if (entries.empty()) {
-    return true;
-  }
+  // We always create a `ProfilingInfo` object, even if there is no instruction we are
+  // interested in. The JIT code cache internally uses it.
 
   // Allocate the `ProfilingInfo` object int the JIT's data space.
   jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
   return code_cache->AddProfilingInfo(self, method, entries, retry_allocation) != nullptr;
 }
 
-void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+InlineCache* ProfilingInfo::GetInlineCache(uint32_t dex_pc) {
   InlineCache* cache = nullptr;
   // TODO: binary search if array is too long.
   for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-    if (cache_[i].dex_pc == dex_pc) {
+    if (cache_[i].dex_pc_ == dex_pc) {
       cache = &cache_[i];
       break;
     }
   }
   DCHECK(cache != nullptr);
+  return cache;
+}
 
+void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = GetInlineCache(dex_pc);
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index b13a315..ddaf02f 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -25,6 +25,7 @@
 namespace art {
 
 class ArtMethod;
+class ProfilingInfo;
 
 namespace jit {
 class JitCodeCache;
@@ -34,6 +35,49 @@
 class Class;
 }
 
+// Structure to store the classes seen at runtime for a specific instruction.
+// Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+class InlineCache {
+ public:
+  bool IsMonomorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 2);
+    return !classes_[0].IsNull() && classes_[1].IsNull();
+  }
+
+  bool IsMegamorphic() const {
+    for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+      if (classes_[i].IsNull()) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  mirror::Class* GetMonomorphicType() const SHARED_REQUIRES(Locks::mutator_lock_) {
+    // Note that we cannot ensure the inline cache is actually monomorphic
+    // at this point, as other threads may have updated it.
+    return classes_[0].Read();
+  }
+
+  bool IsUnitialized() const {
+    return classes_[0].IsNull();
+  }
+
+  bool IsPolymorphic() const {
+    DCHECK_GE(kIndividualCacheSize, 3);
+    return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+  }
+
+ private:
+  static constexpr uint16_t kIndividualCacheSize = 5;
+  uint32_t dex_pc_;
+  GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+
+  friend class ProfilingInfo;
+
+  DISALLOW_COPY_AND_ASSIGN(InlineCache);
+};
+
 /**
  * Profiling info for a method, created and filled by the interpreter once the
  * method is warm, and used by the compiler to drive optimizations.
@@ -67,44 +111,24 @@
     return method_;
   }
 
+  InlineCache* GetInlineCache(uint32_t dex_pc);
+
+  bool IsMethodBeingCompiled() const {
+    return is_method_being_compiled_;
+  }
+
+  void SetIsMethodBeingCompiled(bool value) {
+    is_method_being_compiled_ = value;
+  }
+
  private:
-  // Structure to store the classes seen at runtime for a specific instruction.
-  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
-  struct InlineCache {
-    bool IsMonomorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 2);
-      return !classes_[0].IsNull() && classes_[1].IsNull();
-    }
-
-    bool IsMegamorphic() const {
-      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
-        if (classes_[i].IsNull()) {
-          return false;
-        }
-      }
-      return true;
-    }
-
-    bool IsUnitialized() const {
-      return classes_[0].IsNull();
-    }
-
-    bool IsPolymorphic() const {
-      DCHECK_GE(kIndividualCacheSize, 3);
-      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
-    }
-
-    static constexpr uint16_t kIndividualCacheSize = 5;
-    uint32_t dex_pc;
-    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
-  };
-
   ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
       : number_of_inline_caches_(entries.size()),
-        method_(method) {
+        method_(method),
+        is_method_being_compiled_(false) {
     memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc = entries[i];
+      cache_[i].dex_pc_ = entries[i];
     }
   }
 
@@ -114,6 +138,11 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Whether the ArtMethod is currently being compiled. This flag
+  // is implicitly guarded by the JIT code cache lock.
+  // TODO: Make the JIT code cache lock global.
+  bool is_method_being_compiled_;
+
   // Dynamically allocated array of size `number_of_inline_caches_`.
   InlineCache cache_[0];
 
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index dab1040..aa25f67 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -105,9 +105,32 @@
   stacked_local_ref_cookies.pop_back();
 }
 
-Offset JNIEnvExt::SegmentStateOffset() {
-  return Offset(OFFSETOF_MEMBER(JNIEnvExt, locals) +
-                IndirectReferenceTable::SegmentStateOffset().Int32Value());
+// Note: the offset code is brittle, as we can't use OFFSETOF_MEMBER or offsetof easily. Thus, there
+//       are tests in jni_internal_test to match the results against the actual values.
+
+// This is encoding the knowledge of the structure and layout of JNIEnv fields.
+static size_t JNIEnvSize(size_t pointer_size) {
+  // A single pointer.
+  return pointer_size;
+}
+
+Offset JNIEnvExt::SegmentStateOffset(size_t pointer_size) {
+  size_t locals_offset = JNIEnvSize(pointer_size) +
+                         2 * pointer_size +          // Thread* self + JavaVMExt* vm.
+                         4 +                         // local_ref_cookie.
+                         (pointer_size - 4);         // Padding.
+  size_t irt_segment_state_offset =
+      IndirectReferenceTable::SegmentStateOffset(pointer_size).Int32Value();
+  return Offset(locals_offset + irt_segment_state_offset);
+}
+
+Offset JNIEnvExt::LocalRefCookieOffset(size_t pointer_size) {
+  return Offset(JNIEnvSize(pointer_size) +
+                2 * pointer_size);          // Thread* self + JavaVMExt* vm
+}
+
+Offset JNIEnvExt::SelfOffset(size_t pointer_size) {
+  return Offset(JNIEnvSize(pointer_size));
 }
 
 // Use some defining part of the caller's frame as the identifying mark for the JNI segment.
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 3828ff0..2f8decf 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -50,15 +50,9 @@
   T AddLocalReference(mirror::Object* obj)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
-  static Offset SegmentStateOffset();
-
-  static Offset LocalRefCookieOffset() {
-    return Offset(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie));
-  }
-
-  static Offset SelfOffset() {
-    return Offset(OFFSETOF_MEMBER(JNIEnvExt, self));
-  }
+  static Offset SegmentStateOffset(size_t pointer_size);
+  static Offset LocalRefCookieOffset(size_t pointer_size);
+  static Offset SelfOffset(size_t pointer_size);
 
   jobject NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_);
   void DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 415109f..5e3fa19 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -1689,7 +1689,8 @@
     } else {
       CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf);
       const jchar* chars = s->GetValue();
-      ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
+      size_t bytes = CountUtf8Bytes(chars + start, length);
+      ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length);
     }
   }
 
@@ -1772,7 +1773,7 @@
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != nullptr);  // bionic aborts anyway.
     const uint16_t* chars = s->GetValue();
-    ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength());
+    ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength());
     bytes[byte_count] = '\0';
     return bytes;
   }
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 649df5f..b41d16b 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -18,7 +18,9 @@
 
 #include "art_method-inl.h"
 #include "common_compiler_test.h"
+#include "indirect_reference_table.h"
 #include "java_vm_ext.h"
+#include "jni_env_ext.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
@@ -2210,4 +2212,92 @@
   check_jni_abort_catcher.Check("Still holding a locked object on JNI end");
 }
 
+static bool IsLocked(JNIEnv* env, jobject jobj) {
+  ScopedObjectAccess soa(env);
+  LockWord lock_word = soa.Decode<mirror::Object*>(jobj)->GetLockWord(true);
+  switch (lock_word.GetState()) {
+    case LockWord::kHashCode:
+    case LockWord::kUnlocked:
+      return false;
+    case LockWord::kThinLocked:
+      return true;
+    case LockWord::kFatLocked:
+      return lock_word.FatLockMonitor()->IsLocked();
+    default: {
+      LOG(FATAL) << "Invalid monitor state " << lock_word.GetState();
+      UNREACHABLE();
+    }
+  }
+}
+
+TEST_F(JniInternalTest, DetachThreadUnlockJNIMonitors) {
+  // We need to lock an object, detach, reattach, and check the locks.
+  //
+  // As re-attaching will create a different thread, we need to use a global
+  // ref to keep the object around.
+
+  // Create an object to torture.
+  jobject global_ref;
+  {
+    jclass object_class = env_->FindClass("java/lang/Object");
+    ASSERT_NE(object_class, nullptr);
+    jobject object = env_->AllocObject(object_class);
+    ASSERT_NE(object, nullptr);
+    global_ref = env_->NewGlobalRef(object);
+  }
+
+  // Lock it.
+  env_->MonitorEnter(global_ref);
+  ASSERT_TRUE(IsLocked(env_, global_ref));
+
+  // Detach and re-attach.
+  jint detach_result = vm_->DetachCurrentThread();
+  ASSERT_EQ(detach_result, JNI_OK);
+  jint attach_result = vm_->AttachCurrentThread(&env_, nullptr);
+  ASSERT_EQ(attach_result, JNI_OK);
+
+  // Look at the global ref, check whether it's still locked.
+  ASSERT_FALSE(IsLocked(env_, global_ref));
+
+  // Delete the global ref.
+  env_->DeleteGlobalRef(global_ref);
+}
+
+// Test the offset computation of IndirectReferenceTable offsets. b/26071368.
+TEST_F(JniInternalTest, IndirectReferenceTableOffsets) {
+  // The segment_state_ field is private, and we want to avoid friend declaration. So we'll check
+  // by modifying memory.
+  // The parameters don't really matter here.
+  IndirectReferenceTable irt(5, 5, IndirectRefKind::kGlobal, true);
+  uint32_t old_state = irt.GetSegmentState();
+
+  // Write some new state directly. We invert parts of old_state to ensure a new value.
+  uint32_t new_state = old_state ^ 0x07705005;
+  ASSERT_NE(old_state, new_state);
+
+  uint8_t* base = reinterpret_cast<uint8_t*>(&irt);
+  int32_t segment_state_offset =
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
+  *reinterpret_cast<uint32_t*>(base + segment_state_offset) = new_state;
+
+  // Read and compare.
+  EXPECT_EQ(new_state, irt.GetSegmentState());
+}
+
+// Test the offset computation of JNIEnvExt offsets. b/26071368.
+TEST_F(JniInternalTest, JNIEnvExtOffsets) {
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, local_ref_cookie),
+            JNIEnvExt::LocalRefCookieOffset(sizeof(void*)).Int32Value());
+
+  EXPECT_EQ(OFFSETOF_MEMBER(JNIEnvExt, self), JNIEnvExt::SelfOffset(sizeof(void*)).Int32Value());
+
+  // segment_state_ is private in the IndirectReferenceTable. So this test isn't as good as we'd
+  // hope it to be.
+  int32_t segment_state_now =
+      OFFSETOF_MEMBER(JNIEnvExt, locals) +
+      IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
+  int32_t segment_state_computed = JNIEnvExt::SegmentStateOffset(sizeof(void*)).Int32Value();
+  EXPECT_EQ(segment_state_now, segment_state_computed);
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4b2ac20..e133847 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -154,8 +154,10 @@
   }
 
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (map.get() == nullptr) {
-    *error_msg = StringPrintf("Failed to build process map");
+  if (map == nullptr) {
+    if (error_msg != nullptr) {
+      *error_msg = StringPrintf("Failed to build process map");
+    }
     return false;
   }
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
@@ -164,9 +166,11 @@
       return true;
     }
   }
-  PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
-  *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
-                            "any existing map. See process maps in the log.", begin, end);
+  if (error_msg != nullptr) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::ERROR);
+    *error_msg = StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " does not overlap "
+                              "any existing map. See process maps in the log.", begin, end);
+  }
   return false;
 }
 
@@ -239,15 +243,16 @@
   std::string error_detail;
   CheckNonOverlapping(expected, limit, &error_detail);
 
-  std::ostringstream os;
-  os <<  StringPrintf("Failed to mmap at expected address, mapped at "
-                      "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
-                      actual, expected);
-  if (!error_detail.empty()) {
-    os << " : " << error_detail;
+  if (error_msg != nullptr) {
+    std::ostringstream os;
+    os <<  StringPrintf("Failed to mmap at expected address, mapped at "
+                        "0x%08" PRIxPTR " instead of 0x%08" PRIxPTR,
+                        actual, expected);
+    if (!error_detail.empty()) {
+      os << " : " << error_detail;
+    }
+    *error_msg = os.str();
   }
-
-  *error_msg = os.str();
   return false;
 }
 
@@ -379,7 +384,8 @@
     // Only use this if you actually made the page reservation yourself.
     CHECK(expected_ptr != nullptr);
 
-    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg)) << *error_msg;
+    DCHECK(ContainedWithinExistingMap(expected_ptr, byte_count, error_msg))
+        << ((error_msg != nullptr) ? *error_msg : std::string());
     flags |= MAP_FIXED;
   } else {
     CHECK_EQ(0, flags & MAP_FIXED);
@@ -414,15 +420,17 @@
                                                            page_aligned_offset,
                                                            low_4gb));
   if (actual == MAP_FAILED) {
-    auto saved_errno = errno;
+    if (error_msg != nullptr) {
+      auto saved_errno = errno;
 
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+      PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
 
-    *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
-                              ") of file '%s' failed: %s. See process maps in the log.",
-                              page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
-                              static_cast<int64_t>(page_aligned_offset), filename,
-                              strerror(saved_errno));
+      *error_msg = StringPrintf("mmap(%p, %zd, 0x%x, 0x%x, %d, %" PRId64
+                                ") of file '%s' failed: %s. See process maps in the log.",
+                                page_aligned_expected, page_aligned_byte_count, prot, flags, fd,
+                                static_cast<int64_t>(page_aligned_offset), filename,
+                                strerror(saved_errno));
+    }
     return nullptr;
   }
   std::ostringstream check_map_request_error_msg;
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index a67a925..efce09a 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -99,11 +99,12 @@
                             error_msg);
   }
 
-  // Map part of a file, taking care of non-page aligned offsets.  The
-  // "start" offset is absolute, not relative. This version allows
-  // requesting a specific address for the base of the
-  // mapping. "reuse" allows us to create a view into an existing
-  // mapping where we do not take ownership of the memory.
+  // Map part of a file, taking care of non-page aligned offsets.  The "start" offset is absolute,
+  // not relative. This version allows requesting a specific address for the base of the mapping.
+  // "reuse" allows us to create a view into an existing mapping where we do not take ownership of
+  // the memory. If error_msg is null then we do not print /proc/maps to the log if
+  // MapFileAtAddress fails. This helps improve performance of the fail case since reading and
+  // printing /proc/maps takes several milliseconds in the worst case.
   //
   // On success, returns returns a MemMap instance.  On failure, returns null.
   static MemMap* MapFileAtAddress(uint8_t* addr,
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index be869d4..33aca03 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -109,12 +109,17 @@
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
   DCHECK(utf != nullptr);
-  size_t char_count = CountModifiedUtf8Chars(utf);
-  return AllocFromModifiedUtf8(self, char_count, utf);
+  size_t byte_count = strlen(utf);
+  size_t char_count = CountModifiedUtf8Chars(utf, byte_count);
+  return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
+}
+
+String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+  return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                      const char* utf8_data_in) {
+                                      const char* utf8_data_in, int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   SetStringCountVisitor visitor(utf16_length);
   String* string = Alloc<true>(self, utf16_length, allocator_type, visitor);
@@ -122,7 +127,7 @@
     return nullptr;
   }
   uint16_t* utf16_data_out = string->GetValue();
-  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in);
+  ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
   return string;
 }
 
@@ -217,7 +222,7 @@
   const uint16_t* chars = GetValue();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
-  ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength());
+  ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength());
   return result;
 }
 
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 80ebd2c..e2cfb8d 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -116,6 +116,10 @@
   static String* AllocFromModifiedUtf8(Thread* self, const char* utf)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
+  static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
+                                       const char* utf8_data_in, int32_t utf8_length)
+      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+
   static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in)
       SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 4c5dc3a..b49d68f 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -562,17 +562,20 @@
 
 /*
  * This is called by the framework when it knows the application directory and
- * process name.  We use this information to start up the sampling profiler for
- * for ART.
+ * process name.
  */
-static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName,
-                                      jstring appDir ATTRIBUTE_UNUSED,
+static void VMRuntime_registerAppInfo(JNIEnv* env,
+                                      jclass clazz ATTRIBUTE_UNUSED,
+                                      jstring pkgName,
+                                      jstring appDir,
                                       jstring procName ATTRIBUTE_UNUSED) {
-  const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
-  std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars);
+  const char* appDirChars = env->GetStringUTFChars(appDir, nullptr);
+  const char* pkgNameChars = env->GetStringUTFChars(pkgName, nullptr);
+  std::string profileFile = StringPrintf("%s/code_cache/%s.prof", appDirChars, pkgNameChars);
 
-  Runtime::Current()->StartProfiler(profileFile.c_str());
+  Runtime::Current()->SetJitProfilingFilename(profileFile.c_str());
 
+  env->ReleaseStringUTFChars(appDir, appDirChars);
   env->ReleaseStringUTFChars(pkgName, pkgNameChars);
 }
 
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 680f4ac..83e594b 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -52,10 +52,10 @@
 
 namespace art {
 
-// Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader.
+// Whether OatFile::Open will try dlopen. Fallback is our own ELF loader.
 static constexpr bool kUseDlopen = true;
 
-// Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against
+// Whether OatFile::Open will try dlopen on the host. On the host we're not linking against
 // bionic, so cannot take advantage of the support for changed semantics (loading the same soname
 // multiple times). However, if/when we switch the above, we likely want to switch this, too,
 // to get test coverage of the code paths.
@@ -64,348 +64,140 @@
 // For debugging, Open will print DlOpen error message if set to true.
 static constexpr bool kPrintDlOpenErrorMessage = false;
 
-std::string OatFile::ResolveRelativeEncodedDexLocation(
-      const char* abs_dex_location, const std::string& rel_dex_location) {
-  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
-    // Strip :classes<N>.dex used for secondary multidex files.
-    std::string base = DexFile::GetBaseLocation(rel_dex_location);
-    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+// Note for OatFileBase and descendents:
+//
+// These are used in OatFile::Open to try all our loaders.
+//
+// The process is simple:
+//
+// 1) Allocate an instance through the standard constructor (location, executable)
+// 2) Load() to try to open the file.
+// 3) ComputeFields() to populate the OatFile fields like begin_, using FindDynamicSymbolAddress.
+// 4) PreSetup() for any steps that should be done before the final setup.
+// 5) Setup() to complete the procedure.
 
-    // Check if the base is a suffix of the provided abs_dex_location.
-    std::string target_suffix = "/" + base;
-    std::string abs_location(abs_dex_location);
-    if (abs_location.size() > target_suffix.size()) {
-      size_t pos = abs_location.size() - target_suffix.size();
-      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
-        return abs_location + multidex_suffix;
-      }
-    }
-  }
-  return rel_dex_location;
-}
+class OatFileBase : public OatFile {
+ public:
+  virtual ~OatFileBase() {}
 
-void OatFile::CheckLocation(const std::string& location) {
-  CHECK(!location.empty());
-}
-
-OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+  template <typename kOatFileBaseSubType>
+  static OatFileBase* OpenOatFile(const std::string& elf_filename,
                                   const std::string& location,
+                                  uint8_t* requested_base,
+                                  uint8_t* oat_file_begin,
+                                  bool writable,
+                                  bool executable,
                                   const char* abs_dex_location,
-                                  std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
-  oat_file->elf_file_.reset(elf_file);
-  uint64_t offset, size;
-  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
-  CHECK(has_section);
-  oat_file->begin_ = elf_file->Begin() + offset;
-  oat_file->end_ = elf_file->Begin() + size + offset;
-  // Ignore the optional .bss section when opening non-executable.
-  return oat_file->Setup(abs_dex_location, error_msg) ? oat_file.release() : nullptr;
-}
+                                  std::string* error_msg);
 
-OatFile* OatFile::Open(const std::string& filename,
-                       const std::string& location,
-                       uint8_t* requested_base,
-                       uint8_t* oat_file_begin,
-                       bool executable,
-                       const char* abs_dex_location,
-                       std::string* error_msg) {
-  CHECK(!filename.empty()) << location;
-  CheckLocation(location);
-  std::unique_ptr<OatFile> ret;
+ protected:
+  OatFileBase(const std::string& filename, bool executable) : OatFile(filename, executable) {}
 
-  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
-  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
-  //       !executable is a sign that we may want to patch), which may not be allowed for
-  //       various reasons.
-  // dlopen always returns the same library if it is already opened on the host. For this reason
-  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
-  // the same library loaded multiple times at different addresses is required for class unloading
-  // and for having dex caches arrays in the .bss section.
-  Runtime* const runtime = Runtime::Current();
-  OatFileManager* const manager = (runtime != nullptr) ? &runtime->GetOatFileManager() : nullptr;
-  if (kUseDlopen && executable) {
-    bool success = kIsTargetBuild;
-    bool reserved_location = false;
-      // Manager may be null if we are running without a runtime.
-    if (!success && kUseDlopenOnHost && manager != nullptr) {
-      // RegisterOatFileLocation returns false if we are not the first caller to register that
-      // location.
-      reserved_location = manager->RegisterOatFileLocation(location);
-      success = reserved_location;
-    }
-    if (success) {
-      // Try to use dlopen. This may fail for various reasons, outlined below. We try dlopen, as
-      // this will register the oat file with the linker and allows libunwind to find our info.
-      ret.reset(OpenDlopen(filename, location, requested_base, abs_dex_location, error_msg));
-      if (reserved_location) {
-        manager->UnRegisterOatFileLocation(location);
-      }
-      if (ret != nullptr) {
-        return ret.release();
-      }
-      if (kPrintDlOpenErrorMessage) {
-        LOG(ERROR) << "Failed to dlopen: " << *error_msg;
-      }
-    }
+  virtual const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                                  std::string* error_msg) const = 0;
+
+  virtual bool Load(const std::string& elf_filename,
+                    uint8_t* oat_file_begin,
+                    bool writable,
+                    bool executable,
+                    std::string* error_msg) = 0;
+
+  bool ComputeFields(uint8_t* requested_base,
+                     const std::string& file_path,
+                     std::string* error_msg);
+
+  virtual void PreSetup(const std::string& elf_filename) = 0;
+
+  bool Setup(const char* abs_dex_location, std::string* error_msg);
+
+  // Setters exposed for ElfOatFile.
+
+  void SetBegin(const uint8_t* begin) {
+    begin_ = begin;
   }
 
-  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
-  //
-  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  //
-  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
-  //
-  //
-  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
-  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
-  std::unique_ptr<File> file(OS::OpenFileForReading(filename.c_str()));
-  if (file == nullptr) {
-    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+  void SetEnd(const uint8_t* end) {
+    end_ = end;
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(OatFileBase);
+};
+
+template <typename kOatFileBaseSubType>
+OatFileBase* OatFileBase::OpenOatFile(const std::string& elf_filename,
+                                      const std::string& location,
+                                      uint8_t* requested_base,
+                                      uint8_t* oat_file_begin,
+                                      bool writable,
+                                      bool executable,
+                                      const char* abs_dex_location,
+                                      std::string* error_msg) {
+  std::unique_ptr<OatFileBase> ret(new kOatFileBaseSubType(location, executable));
+  if (!ret->Load(elf_filename,
+                 oat_file_begin,
+                 writable,
+                 executable,
+                 error_msg)) {
     return nullptr;
   }
-  ret.reset(OpenElfFile(file.get(), location, requested_base, oat_file_begin, false, executable,
-                        abs_dex_location, error_msg));
 
-  // It would be nice to unlink here. But we might have opened the file created by the
-  // ScopedLock, which we better not delete to avoid races. TODO: Investigate how to fix the API
-  // to allow removal when we know the ELF must be borked.
+  if (!ret->ComputeFields(requested_base, elf_filename, error_msg)) {
+    return nullptr;
+  }
+
+  ret->PreSetup(elf_filename);
+
+  if (!ret->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
   return ret.release();
 }
 
-OatFile* OatFile::OpenWritable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, true, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenReadable(File* file, const std::string& location,
-                               const char* abs_dex_location,
-                               std::string* error_msg) {
-  CheckLocation(location);
-  return OpenElfFile(file, location, nullptr, nullptr, false, false, abs_dex_location, error_msg);
-}
-
-OatFile* OatFile::OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, true));
-  bool success = oat_file->Dlopen(elf_filename, requested_base, abs_dex_location, error_msg);
-  if (!success) {
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile* OatFile::OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg) {
-  std::unique_ptr<OatFile> oat_file(new OatFile(location, executable));
-  bool success = oat_file->ElfFileOpen(file, requested_base, oat_file_begin, writable, executable,
-                                       abs_dex_location, error_msg);
-  if (!success) {
-    CHECK(!error_msg->empty());
-    return nullptr;
-  }
-  return oat_file.release();
-}
-
-OatFile::OatFile(const std::string& location, bool is_executable)
-    : location_(location), begin_(nullptr), end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr),
-      is_executable_(is_executable), dlopen_handle_(nullptr),
-      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
-  CHECK(!location_.empty());
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().RegisterOatFileLocation(location);
-  }
-}
-
-OatFile::~OatFile() {
-  STLDeleteElements(&oat_dex_files_storage_);
-  if (dlopen_handle_ != nullptr) {
-    dlclose(dlopen_handle_);
-  }
-  Runtime* const runtime = Runtime::Current();
-  if (runtime != nullptr && !runtime->IsAotCompiler()) {
-    runtime->GetOatFileManager().UnRegisterOatFileLocation(location_);
-  }
-}
-
-bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-                     const char* abs_dex_location, std::string* error_msg) {
-#ifdef __APPLE__
-  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
-  // but let's fallback to the custom loading code for the time being.
-  UNUSED(elf_filename, requested_base, abs_dex_location, error_msg);
-  return false;
-#else
-  {
-    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
-    if (absolute_path == nullptr) {
-      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
-      return false;
-    }
-#ifdef __ANDROID__
-    android_dlextinfo extinfo;
-    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR;
-    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
-#else
-    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
-#endif
-  }
-  if (dlopen_handle_ == nullptr) {
-    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
-    return false;
-  }
-  begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatdata"));
+bool OatFileBase::ComputeFields(uint8_t* requested_base,
+                                const std::string& file_path,
+                                std::string* error_msg) {
+  std::string symbol_error_msg;
+  begin_ = FindDynamicSymbolAddress("oatdata", &symbol_error_msg);
   if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   if (requested_base != nullptr && begin_ != requested_base) {
     PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
     *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p, %s. See process maps in the log.",
-                              begin_, requested_base, elf_filename.c_str());
+        "oatdata=%p != expected=%p. See process maps in the log.",
+        begin_, requested_base);
     return false;
   }
-  end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatlastword"));
+  end_ = FindDynamicSymbolAddress("oatlastword", &symbol_error_msg);
   if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s': %s", elf_filename.c_str(),
-                              dlerror());
+    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s' %s",
+                              file_path.c_str(),
+                              symbol_error_msg.c_str());
     return false;
   }
   // Readjust to be non-inclusive upper bound.
   end_ += sizeof(uint32_t);
 
-  bss_begin_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbss"));
+  bss_begin_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbss", &symbol_error_msg));
   if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
+    // No .bss section.
     bss_end_ = nullptr;
-    dlerror();
   } else {
-    bss_end_ = reinterpret_cast<uint8_t*>(dlsym(dlopen_handle_, "oatbsslastword"));
+    bss_end_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbsslastword", &symbol_error_msg));
     if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                elf_filename.c_str());
+      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'", file_path.c_str());
       return false;
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
   }
 
-  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
-  struct dl_iterate_context {
-    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
-      auto* context = reinterpret_cast<dl_iterate_context*>(data);
-      // See whether this callback corresponds to the file which we have just loaded.
-      bool contains_begin = false;
-      for (int i = 0; i < info->dlpi_phnum; i++) {
-        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                      info->dlpi_phdr[i].p_vaddr);
-          size_t memsz = info->dlpi_phdr[i].p_memsz;
-          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
-            contains_begin = true;
-            break;
-          }
-        }
-      }
-      // Add dummy mmaps for this file.
-      if (contains_begin) {
-        for (int i = 0; i < info->dlpi_phnum; i++) {
-          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
-            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
-                                                        info->dlpi_phdr[i].p_vaddr);
-            size_t memsz = info->dlpi_phdr[i].p_memsz;
-            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
-            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
-          }
-        }
-        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
-      }
-      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
-    }
-    const uint8_t* const begin_;
-    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
-  } context = { begin_, &dlopen_mmaps_ };
-
-  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
-  }
-
-  return Setup(abs_dex_location, error_msg);
-#endif  // __APPLE__
-}
-
-bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin,
-                          bool writable, bool executable,
-                          const char* abs_dex_location,
-                          std::string* error_msg) {
-  // TODO: rename requested_base to oat_data_begin
-  elf_file_.reset(ElfFile::Open(file, writable, /*program_header_only*/true, error_msg,
-                                oat_file_begin));
-  if (elf_file_ == nullptr) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  bool loaded = elf_file_->Load(executable, error_msg);
-  if (!loaded) {
-    DCHECK(!error_msg->empty());
-    return false;
-  }
-  begin_ = elf_file_->FindDynamicSymbolAddress("oatdata");
-  if (begin_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatdata symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  if (requested_base != nullptr && begin_ != requested_base) {
-    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
-    *error_msg = StringPrintf("Failed to find oatdata symbol at expected address: "
-                              "oatdata=%p != expected=%p. See process maps in the log.",
-                              begin_, requested_base);
-    return false;
-  }
-  end_ = elf_file_->FindDynamicSymbolAddress("oatlastword");
-  if (end_ == nullptr) {
-    *error_msg = StringPrintf("Failed to find oatlastword symbol in '%s'", file->GetPath().c_str());
-    return false;
-  }
-  // Readjust to be non-inclusive upper bound.
-  end_ += sizeof(uint32_t);
-
-  bss_begin_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbss"));
-  if (bss_begin_ == nullptr) {
-    // No .bss section. Clear dlerror().
-    bss_end_ = nullptr;
-    dlerror();
-  } else {
-    bss_end_ = const_cast<uint8_t*>(elf_file_->FindDynamicSymbolAddress("oatbsslastword"));
-    if (bss_end_ == nullptr) {
-      *error_msg = StringPrintf("Failed to find oatbasslastword symbol in '%s'",
-                                file->GetPath().c_str());
-      return false;
-    }
-    // Readjust to be non-inclusive upper bound.
-    bss_end_ += sizeof(uint32_t);
-  }
-
-  return Setup(abs_dex_location, error_msg);
+  return true;
 }
 
 // Read an unaligned entry from the OatDexFile data in OatFile and advance the read
@@ -428,7 +220,7 @@
   return true;
 }
 
-bool OatFile::Setup(const char* abs_dex_location, std::string* error_msg) {
+bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) {
   if (!GetOatHeader().IsValid()) {
     std::string cause = GetOatHeader().GetValidationErrorMessage();
     *error_msg = StringPrintf("Invalid oat header for '%s': %s",
@@ -630,6 +422,486 @@
   return true;
 }
 
+////////////////////////
+// OatFile via dlopen //
+////////////////////////
+
+static bool RegisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      return runtime->GetOatFileManager().RegisterOatFileLocation(location);
+    }
+    return false;
+  }
+  return true;
+}
+
+static void UnregisterOatFileLocation(const std::string& location) {
+  if (!kIsTargetBuild) {
+    Runtime* const runtime = Runtime::Current();
+    if (runtime != nullptr && !runtime->IsAotCompiler()) {
+      runtime->GetOatFileManager().UnRegisterOatFileLocation(location);
+    }
+  }
+}
+
+class DlOpenOatFile FINAL : public OatFileBase {
+ public:
+  DlOpenOatFile(const std::string& filename, bool executable)
+      : OatFileBase(filename, executable),
+        dlopen_handle_(nullptr),
+        first_oat_(RegisterOatFileLocation(filename)) {
+  }
+
+  ~DlOpenOatFile() {
+    if (dlopen_handle_ != nullptr) {
+      dlclose(dlopen_handle_);
+    }
+    UnregisterOatFileLocation(GetLocation());
+  }
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr =
+        reinterpret_cast<const uint8_t*>(dlsym(dlopen_handle_, symbol_name.c_str()));
+    if (ptr == nullptr) {
+      *error_msg = dlerror();
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions.
+  void PreSetup(const std::string& elf_filename) OVERRIDE;
+
+ private:
+  bool Dlopen(const std::string& elf_filename,
+              uint8_t* oat_file_begin,
+              std::string* error_msg);
+
+  // dlopen handle during runtime.
+  void* dlopen_handle_;  // TODO: Unique_ptr with custom deleter.
+
+  // Dummy memory map objects corresponding to the regions mapped by dlopen.
+  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
+
+  // Track the registration status (= was this the first oat file) for the location.
+  const bool first_oat_;
+
+  DISALLOW_COPY_AND_ASSIGN(DlOpenOatFile);
+};
+
+bool DlOpenOatFile::Load(const std::string& elf_filename,
+                         uint8_t* oat_file_begin,
+                         bool writable,
+                         bool executable,
+                         std::string* error_msg) {
+  // Use dlopen only when flagged to do so, and when it's OK to load things executable.
+  // TODO: Also try when not executable? The issue here could be re-mapping as writable (as
+  //       !executable is a sign that we may want to patch), which may not be allowed for
+  //       various reasons.
+  if (!kUseDlopen) {
+    *error_msg = "DlOpen is disabled.";
+    return false;
+  }
+  if (writable) {
+    *error_msg = "DlOpen does not support writable loading.";
+    return false;
+  }
+  if (!executable) {
+    *error_msg = "DlOpen does not support non-executable loading.";
+    return false;
+  }
+
+  // dlopen always returns the same library if it is already opened on the host. For this reason
+  // we only use dlopen if we are the target or we do not already have the dex file opened. Having
+  // the same library loaded multiple times at different addresses is required for class unloading
+  // and for having dex caches arrays in the .bss section.
+  if (!kIsTargetBuild) {
+    if (!kUseDlopenOnHost) {
+      *error_msg = "DlOpen disabled for host.";
+      return false;
+    }
+    // For RAII, tracking multiple loads is done in the constructor and destructor. The result is
+    // stored in the first_oat_ flag.
+    if (!first_oat_) {
+      *error_msg = "Loading oat files multiple times with dlopen not supported on host.";
+      return false;
+    }
+  }
+
+  bool success = Dlopen(elf_filename, oat_file_begin, error_msg);
+  DCHECK(dlopen_handle_ != nullptr || !success);
+
+  return success;
+}
+
+bool DlOpenOatFile::Dlopen(const std::string& elf_filename,
+                           uint8_t* oat_file_begin,
+                           std::string* error_msg) {
+#ifdef __APPLE__
+  // The dl_iterate_phdr syscall is missing.  There is similar API on OSX,
+  // but let's fallback to the custom loading code for the time being.
+  UNUSED(elf_filename, oat_file_begin);
+  *error_msg = "Dlopen unsupported on Mac.";
+  return false;
+#else
+  {
+    UniqueCPtr<char> absolute_path(realpath(elf_filename.c_str(), nullptr));
+    if (absolute_path == nullptr) {
+      *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str());
+      return false;
+    }
+#ifdef __ANDROID__
+    android_dlextinfo extinfo;
+    extinfo.flags = ANDROID_DLEXT_FORCE_LOAD |                  // Force-load, don't reuse handle
+                                                                //   (open oat files multiple
+                                                                //    times).
+                    ANDROID_DLEXT_FORCE_FIXED_VADDR;            // Take a non-zero vaddr as absolute
+                                                                //   (non-pic boot image).
+    if (oat_file_begin != nullptr) {                            //
+      extinfo.flags |= ANDROID_DLEXT_LOAD_AT_FIXED_ADDRESS;     // Use the requested addr if
+      extinfo.reserved_addr = oat_file_begin;                   // vaddr = 0.
+    }                                                           //   (pic boot image).
+    dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo);
+#else
+    dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW);
+    UNUSED(oat_file_begin);
+#endif
+  }
+  if (dlopen_handle_ == nullptr) {
+    *error_msg = StringPrintf("Failed to dlopen '%s': %s", elf_filename.c_str(), dlerror());
+    return false;
+  }
+  return true;
+#endif
+}
+
+void DlOpenOatFile::PreSetup(const std::string& elf_filename) {
+#ifdef __APPLE__
+  UNUSED(elf_filename);
+  LOG(FATAL) << "Should not reach here.";
+  UNREACHABLE();
+#else
+  struct dl_iterate_context {
+    static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) {
+      auto* context = reinterpret_cast<dl_iterate_context*>(data);
+      // See whether this callback corresponds to the file which we have just loaded.
+      bool contains_begin = false;
+      for (int i = 0; i < info->dlpi_phnum; i++) {
+        if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+          uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+              info->dlpi_phdr[i].p_vaddr);
+          size_t memsz = info->dlpi_phdr[i].p_memsz;
+          if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) {
+            contains_begin = true;
+            break;
+          }
+        }
+      }
+      // Add dummy mmaps for this file.
+      if (contains_begin) {
+        for (int i = 0; i < info->dlpi_phnum; i++) {
+          if (info->dlpi_phdr[i].p_type == PT_LOAD) {
+            uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr +
+                info->dlpi_phdr[i].p_vaddr);
+            size_t memsz = info->dlpi_phdr[i].p_memsz;
+            MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz);
+            context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap));
+          }
+        }
+        return 1;  // Stop iteration and return 1 from dl_iterate_phdr.
+      }
+      return 0;  // Continue iteration and return 0 from dl_iterate_phdr when finished.
+    }
+    const uint8_t* const begin_;
+    std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_;
+  } context = { Begin(), &dlopen_mmaps_ };
+
+  if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) {
+    PrintFileToLog("/proc/self/maps", LogSeverity::WARNING);
+    LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps.";
+  }
+#endif
+}
+
+////////////////////////////////////////////////
+// OatFile via our own ElfFile implementation //
+////////////////////////////////////////////////
+
+class ElfOatFile FINAL : public OatFileBase {
+ public:
+  ElfOatFile(const std::string& filename, bool executable) : OatFileBase(filename, executable) {}
+
+  static ElfOatFile* OpenElfFile(File* file,
+                                 const std::string& location,
+                                 uint8_t* requested_base,
+                                 uint8_t* oat_file_begin,  // Override base if not null
+                                 bool writable,
+                                 bool executable,
+                                 const char* abs_dex_location,
+                                 std::string* error_msg);
+
+  bool InitializeFromElfFile(ElfFile* elf_file,
+                             const char* abs_dex_location,
+                             std::string* error_msg);
+
+ protected:
+  const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name,
+                                          std::string* error_msg) const OVERRIDE {
+    const uint8_t* ptr = elf_file_->FindDynamicSymbolAddress(symbol_name);
+    if (ptr == nullptr) {
+      *error_msg = "(Internal implementation could not find symbol)";
+    }
+    return ptr;
+  }
+
+  bool Load(const std::string& elf_filename,
+            uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+            bool writable,
+            bool executable,
+            std::string* error_msg) OVERRIDE;
+
+  void PreSetup(const std::string& elf_filename ATTRIBUTE_UNUSED) OVERRIDE {
+  }
+
+ private:
+  bool ElfFileOpen(File* file,
+                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                   bool writable,
+                   bool executable,
+                   std::string* error_msg);
+
+ private:
+  // Backing memory map for oat file during cross compilation.
+  std::unique_ptr<ElfFile> elf_file_;
+
+  DISALLOW_COPY_AND_ASSIGN(ElfOatFile);
+};
+
+ElfOatFile* ElfOatFile::OpenElfFile(File* file,
+                                    const std::string& location,
+                                    uint8_t* requested_base,
+                                    uint8_t* oat_file_begin,  // Override base if not null
+                                    bool writable,
+                                    bool executable,
+                                    const char* abs_dex_location,
+                                    std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, executable));
+  bool success = oat_file->ElfFileOpen(file, oat_file_begin, writable, executable, error_msg);
+  if (!success) {
+    CHECK(!error_msg->empty());
+    return nullptr;
+  }
+
+  // Complete the setup.
+  if (!oat_file->ComputeFields(requested_base, file->GetPath(), error_msg)) {
+    return nullptr;
+  }
+
+  if (!oat_file->Setup(abs_dex_location, error_msg)) {
+    return nullptr;
+  }
+
+  return oat_file.release();
+}
+
+bool ElfOatFile::InitializeFromElfFile(ElfFile* elf_file,
+                                       const char* abs_dex_location,
+                                       std::string* error_msg) {
+  if (IsExecutable()) {
+    *error_msg = "Cannot initialize from elf file in executable mode.";
+    return false;
+  }
+  elf_file_.reset(elf_file);
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  SetBegin(elf_file->Begin() + offset);
+  SetEnd(elf_file->Begin() + size + offset);
+  // Ignore the optional .bss section when opening non-executable.
+  return Setup(abs_dex_location, error_msg);
+}
+
+bool ElfOatFile::Load(const std::string& elf_filename,
+                      uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
+                      bool writable,
+                      bool executable,
+                      std::string* error_msg) {
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  if (file == nullptr) {
+    *error_msg = StringPrintf("Failed to open oat filename for reading: %s", strerror(errno));
+    return false;
+  }
+  return ElfOatFile::ElfFileOpen(file.get(),
+                                 oat_file_begin,
+                                 writable,
+                                 executable,
+                                 error_msg);
+}
+
+bool ElfOatFile::ElfFileOpen(File* file,
+                             uint8_t* oat_file_begin,
+                             bool writable,
+                             bool executable,
+                             std::string* error_msg) {
+  // TODO: rename requested_base to oat_data_begin
+  elf_file_.reset(ElfFile::Open(file,
+                                writable,
+                                /*program_header_only*/true,
+                                error_msg,
+                                oat_file_begin));
+  if (elf_file_ == nullptr) {
+    DCHECK(!error_msg->empty());
+    return false;
+  }
+  bool loaded = elf_file_->Load(executable, error_msg);
+  DCHECK(loaded || !error_msg->empty());
+  return loaded;
+}
+
+//////////////////////////
+// General OatFile code //
+//////////////////////////
+
+std::string OatFile::ResolveRelativeEncodedDexLocation(
+      const char* abs_dex_location, const std::string& rel_dex_location) {
+  if (abs_dex_location != nullptr && rel_dex_location[0] != '/') {
+    // Strip :classes<N>.dex used for secondary multidex files.
+    std::string base = DexFile::GetBaseLocation(rel_dex_location);
+    std::string multidex_suffix = DexFile::GetMultiDexSuffix(rel_dex_location);
+
+    // Check if the base is a suffix of the provided abs_dex_location.
+    std::string target_suffix = "/" + base;
+    std::string abs_location(abs_dex_location);
+    if (abs_location.size() > target_suffix.size()) {
+      size_t pos = abs_location.size() - target_suffix.size();
+      if (abs_location.compare(pos, std::string::npos, target_suffix) == 0) {
+        return abs_location + multidex_suffix;
+      }
+    }
+  }
+  return rel_dex_location;
+}
+
+static void CheckLocation(const std::string& location) {
+  CHECK(!location.empty());
+}
+
+OatFile* OatFile::OpenWithElfFile(ElfFile* elf_file,
+                                  const std::string& location,
+                                  const char* abs_dex_location,
+                                  std::string* error_msg) {
+  std::unique_ptr<ElfOatFile> oat_file(new ElfOatFile(location, false /* executable */));
+  return oat_file->InitializeFromElfFile(elf_file, abs_dex_location, error_msg)
+      ? oat_file.release()
+      : nullptr;
+}
+
+OatFile* OatFile::Open(const std::string& filename,
+                       const std::string& location,
+                       uint8_t* requested_base,
+                       uint8_t* oat_file_begin,
+                       bool executable,
+                       const char* abs_dex_location,
+                       std::string* error_msg) {
+  CHECK(!filename.empty()) << location;
+  CheckLocation(location);
+  std::unique_ptr<OatFile> ret;
+
+  // Try dlopen first, as it is required for native debuggability. This will fail fast if dlopen is
+  // disabled.
+  OatFile* with_dlopen = OatFileBase::OpenOatFile<DlOpenOatFile>(filename,
+                                                                 location,
+                                                                 requested_base,
+                                                                 oat_file_begin,
+                                                                 false,
+                                                                 executable,
+                                                                 abs_dex_location,
+                                                                 error_msg);
+  if (with_dlopen != nullptr) {
+    return with_dlopen;
+  }
+  if (kPrintDlOpenErrorMessage) {
+    LOG(ERROR) << "Failed to dlopen: " << *error_msg;
+  }
+
+  // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
+  //
+  // On target, dlopen may fail when compiling due to selinux restrictions on installd.
+  //
+  // We use our own ELF loader for Quick to deal with legacy apps that
+  // open a generated dex file by name, remove the file, then open
+  // another generated dex file with the same name. http://b/10614658
+  //
+  // On host, dlopen is expected to fail when cross compiling, so fall back to OpenElfFile.
+  //
+  //
+  // Another independent reason is the absolute placement of boot.oat. dlopen on the host usually
+  // does honor the virtual address encoded in the ELF file only for ET_EXEC files, not ET_DYN.
+  OatFile* with_internal = OatFileBase::OpenOatFile<ElfOatFile>(filename,
+                                                                location,
+                                                                requested_base,
+                                                                oat_file_begin,
+                                                                false,
+                                                                executable,
+                                                                abs_dex_location,
+                                                                error_msg);
+  return with_internal;
+}
+
+OatFile* OatFile::OpenWritable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 true,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile* OatFile::OpenReadable(File* file,
+                               const std::string& location,
+                               const char* abs_dex_location,
+                               std::string* error_msg) {
+  CheckLocation(location);
+  return ElfOatFile::OpenElfFile(file,
+                                 location,
+                                 nullptr,
+                                 nullptr,
+                                 false,
+                                 false,
+                                 abs_dex_location,
+                                 error_msg);
+}
+
+OatFile::OatFile(const std::string& location, bool is_executable)
+    : location_(location),
+      begin_(nullptr),
+      end_(nullptr),
+      bss_begin_(nullptr),
+      bss_end_(nullptr),
+      is_executable_(is_executable),
+      secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
+  CHECK(!location_.empty());
+}
+
+OatFile::~OatFile() {
+  STLDeleteElements(&oat_dex_files_storage_);
+}
+
 const OatHeader& OatFile::GetOatHeader() const {
   return *reinterpret_cast<const OatHeader*>(Begin());
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 0a77654..dbd7541 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -40,7 +40,7 @@
 class OatHeader;
 class OatDexFile;
 
-class OatFile FINAL {
+class OatFile {
  public:
   typedef art::OatDexFile OatDexFile;
 
@@ -74,7 +74,7 @@
                                const char* abs_dex_location,
                                std::string* error_msg);
 
-  ~OatFile();
+  virtual ~OatFile();
 
   bool IsExecutable() const {
     return is_executable_;
@@ -85,12 +85,6 @@
   // Indicates whether the oat file was compiled with full debugging capability.
   bool IsDebuggable() const;
 
-  ElfFile* GetElfFile() const {
-    CHECK_NE(reinterpret_cast<uintptr_t>(elf_file_.get()), reinterpret_cast<uintptr_t>(nullptr))
-        << "Cannot get an elf file from " << GetLocation();
-    return elf_file_.get();
-  }
-
   const std::string& GetLocation() const {
     return location_;
   }
@@ -260,35 +254,10 @@
   static bool GetDexLocationsFromDependencies(const char* dex_dependencies,
                                               std::vector<std::string>* locations);
 
+ protected:
+  OatFile(const std::string& filename, bool executable);
+
  private:
-  static void CheckLocation(const std::string& location);
-
-  static OatFile* OpenDlopen(const std::string& elf_filename,
-                             const std::string& location,
-                             uint8_t* requested_base,
-                             const char* abs_dex_location,
-                             std::string* error_msg);
-
-  static OatFile* OpenElfFile(File* file,
-                              const std::string& location,
-                              uint8_t* requested_base,
-                              uint8_t* oat_file_begin,  // Override base if not null
-                              bool writable,
-                              bool executable,
-                              const char* abs_dex_location,
-                              std::string* error_msg);
-
-  explicit OatFile(const std::string& filename, bool executable);
-  bool Dlopen(const std::string& elf_filename, uint8_t* requested_base,
-              const char* abs_dex_location, std::string* error_msg);
-  bool ElfFileOpen(File* file, uint8_t* requested_base,
-                   uint8_t* oat_file_begin,  // Override where the file is loaded to if not null
-                   bool writable, bool executable,
-                   const char* abs_dex_location,
-                   std::string* error_msg);
-
-  bool Setup(const char* abs_dex_location, std::string* error_msg);
-
   // The oat file name.
   //
   // The image will embed this to link its associated oat file.
@@ -309,18 +278,6 @@
   // Was this oat_file loaded executable?
   const bool is_executable_;
 
-  // Backing memory map for oat file during when opened by ElfWriter during initial compilation.
-  std::unique_ptr<MemMap> mem_map_;
-
-  // Backing memory map for oat file during cross compilation.
-  std::unique_ptr<ElfFile> elf_file_;
-
-  // dlopen handle during runtime.
-  void* dlopen_handle_;
-
-  // Dummy memory map objects corresponding to the regions mapped by dlopen.
-  std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_;
-
   // Owning storage for the OatDexFile objects.
   std::vector<const OatDexFile*> oat_dex_files_storage_;
 
@@ -356,6 +313,7 @@
   friend class OatClass;
   friend class art::OatDexFile;
   friend class OatDumper;  // For GetBase and GetLimit
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatFile);
 };
 
@@ -426,6 +384,7 @@
   uint8_t* const dex_cache_arrays_;
 
   friend class OatFile;
+  friend class OatFileBase;
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index dfd783b..5b10610 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -41,15 +41,13 @@
                                                     // Runtime::Abort
 }
 
-ParsedOptions* ParsedOptions::Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                                     RuntimeArgumentMap* runtime_options) {
+bool ParsedOptions::Parse(const RuntimeOptions& options,
+                          bool ignore_unrecognized,
+                          RuntimeArgumentMap* runtime_options) {
   CHECK(runtime_options != nullptr);
 
-  std::unique_ptr<ParsedOptions> parsed(new ParsedOptions());
-  if (parsed->Parse(options, ignore_unrecognized, runtime_options)) {
-    return parsed.release();
-  }
-  return nullptr;
+  ParsedOptions parser;
+  return parser.DoParse(options, ignore_unrecognized, runtime_options);
 }
 
 using RuntimeParser = CmdlineParser<RuntimeArgumentMap, RuntimeArgumentMap::Key>;
@@ -164,6 +162,9 @@
       .Define("-Xjitwarmupthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::JITWarmupThreshold)
+      .Define("-Xjitsaveprofilinginfo")
+          .WithValue(true)
+          .IntoKey(M::JITSaveProfilingInfo)
       .Define("-XX:HspaceCompactForOOMMinIntervalMs=_")  // in ms
           .WithType<MillisecondsToNanoseconds>()  // store as ns
           .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs)
@@ -404,8 +405,9 @@
   //  gLogVerbosity.verifier = true;  // TODO: don't check this in!
 }
 
-bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized,
-                          RuntimeArgumentMap* runtime_options) {
+bool ParsedOptions::DoParse(const RuntimeOptions& options,
+                            bool ignore_unrecognized,
+                            RuntimeArgumentMap* runtime_options) {
   for (size_t i = 0; i < options.size(); ++i) {
     if (true && options[0].first == "-Xzygote") {
       LOG(INFO) << "option[" << i << "]=" << options[i].first;
@@ -556,7 +558,9 @@
     args.Set(M::Image, image);
   }
 
-  if (args.GetOrDefault(M::HeapGrowthLimit) == 0u) {  // 0 means no growth limit
+  // 0 means no growth limit, and growth limit should be always <= heap size
+  if (args.GetOrDefault(M::HeapGrowthLimit) <= 0u ||
+      args.GetOrDefault(M::HeapGrowthLimit) > args.GetOrDefault(M::MemoryMaximumSize)) {
     args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize));
   }
 
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 529dd5c..5974fb6 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -50,8 +50,9 @@
   static std::unique_ptr<RuntimeParser> MakeParser(bool ignore_unrecognized);
 
   // returns true if parsing succeeds, and stores the resulting options into runtime_options
-  static ParsedOptions* Create(const RuntimeOptions& options, bool ignore_unrecognized,
-                               RuntimeArgumentMap* runtime_options);
+  static bool Parse(const RuntimeOptions& options,
+                    bool ignore_unrecognized,
+                    RuntimeArgumentMap* runtime_options);
 
   bool (*hook_is_sensitive_thread_)();
   jint (*hook_vfprintf_)(FILE* stream, const char* format, va_list ap);
@@ -72,8 +73,9 @@
   void Exit(int status);
   void Abort();
 
-  bool Parse(const RuntimeOptions& options,  bool ignore_unrecognized,
-             RuntimeArgumentMap* runtime_options);
+  bool DoParse(const RuntimeOptions& options,
+               bool ignore_unrecognized,
+               RuntimeArgumentMap* runtime_options);
 };
 
 }  // namespace art
diff --git a/runtime/parsed_options_test.cc b/runtime/parsed_options_test.cc
index a8575de..fad00c7 100644
--- a/runtime/parsed_options_test.cc
+++ b/runtime/parsed_options_test.cc
@@ -60,8 +60,8 @@
   options.push_back(std::make_pair("exit", test_exit));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
-  ASSERT_TRUE(parsed.get() != nullptr);
+  bool parsed = ParsedOptions::Parse(options, false, &map);
+  ASSERT_TRUE(parsed);
   ASSERT_NE(0u, map.Size());
 
   using Opt = RuntimeArgumentMap;
@@ -102,8 +102,8 @@
   options.push_back(std::make_pair("-Xgc:MC", nullptr));
 
   RuntimeArgumentMap map;
-  std::unique_ptr<ParsedOptions> parsed(ParsedOptions::Create(options, false, &map));
-  ASSERT_TRUE(parsed.get() != nullptr);
+  bool parsed = ParsedOptions::Parse(options, false, &map);
+  ASSERT_TRUE(parsed);
   ASSERT_NE(0u, map.Size());
 
   using Opt = RuntimeArgumentMap;
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 1552318..9cb37ee 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -283,7 +283,12 @@
         prev_shadow_frame_(nullptr),
         stacked_shadow_frame_pushed_(false),
         single_frame_deopt_(single_frame),
-        single_frame_done_(false) {
+        single_frame_done_(false),
+        single_frame_deopt_method_(nullptr) {
+  }
+
+  ArtMethod* GetSingleFrameDeoptMethod() const {
+    return single_frame_deopt_method_;
   }
 
   bool VisitFrame() OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
@@ -356,6 +361,7 @@
         // Single-frame deopt ends at the first non-inlined frame and needs to store that method.
         exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(method));
         single_frame_done_ = true;
+        single_frame_deopt_method_ = method;
       }
       return true;
     }
@@ -586,6 +592,7 @@
   bool stacked_shadow_frame_pushed_;
   const bool single_frame_deopt_;
   bool single_frame_done_;
+  ArtMethod* single_frame_deopt_method_;
 
   DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor);
 };
@@ -614,6 +621,14 @@
   DeoptimizeStackVisitor visitor(self_, context_, this, true);
   visitor.WalkStack(true);
 
+  // Compiled code made an explicit deoptimization. Transfer the code
+  // to interpreter and clear the counter to JIT the method again.
+  ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod();
+  DCHECK(deopt_method != nullptr);
+  deopt_method->ClearCounter();
+  Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+      deopt_method, GetQuickToInterpreterBridge());
+
   // PC needs to be of the quick-to-interpreter bridge.
   int32_t offset;
   #ifdef __LP64__
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a210aa8..dedc110 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -218,6 +218,9 @@
   if (is_native_bridge_loaded_) {
     UnloadNativeBridge();
   }
+
+  MaybeSaveJitProfilingInfo();
+
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -431,14 +434,25 @@
   GetLambdaBoxTable()->SweepWeakBoxedLambdas(visitor);
 }
 
-bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) {
+bool Runtime::ParseOptions(const RuntimeOptions& raw_options,
+                           bool ignore_unrecognized,
+                           RuntimeArgumentMap* runtime_options) {
+  InitLogging(/* argv */ nullptr);  // Calls Locks::Init() as a side effect.
+  bool parsed = ParsedOptions::Parse(raw_options, ignore_unrecognized, runtime_options);
+  if (!parsed) {
+    LOG(ERROR) << "Failed to parse options";
+    return false;
+  }
+  return true;
+}
+
+bool Runtime::Create(RuntimeArgumentMap&& runtime_options) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != nullptr) {
     return false;
   }
-  InitLogging(nullptr);  // Calls Locks::Init() as a side effect.
   instance_ = new Runtime;
-  if (!instance_->Init(options, ignore_unrecognized)) {
+  if (!instance_->Init(std::move(runtime_options))) {
     // TODO: Currently deleting the instance will abort the runtime on destruction. Now This will
     // leak memory, instead. Fix the destructor. b/19100793.
     // delete instance_;
@@ -448,6 +462,12 @@
   return true;
 }
 
+bool Runtime::Create(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
+  RuntimeArgumentMap runtime_options;
+  return ParseOptions(raw_options, ignore_unrecognized, &runtime_options) &&
+      Create(std::move(runtime_options));
+}
+
 static jobject CreateSystemClassLoader(Runtime* runtime) {
   if (runtime->IsAotCompiler() && !runtime->GetCompilerCallbacks()->IsBootImage()) {
     return nullptr;
@@ -601,7 +621,6 @@
       LOG(INFO) << "Failed to access the profile file. Profiler disabled.";
       return true;
     }
-    StartProfiler(profile_output_filename_.c_str());
   }
 
   if (trace_config_.get() != nullptr && trace_config_->trace_file != "") {
@@ -773,7 +792,7 @@
   std::unique_ptr<const OatFile> oat_file(
       OatFile::OpenWithElfFile(elf_file.release(), oat_location, nullptr, &error_msg));
   if (oat_file == nullptr) {
-    LOG(INFO) << "Unable to use '" << oat_filename << "' because " << error_msg;
+    LOG(WARNING) << "Unable to use '" << oat_filename << "' because " << error_msg;
     return false;
   }
 
@@ -827,21 +846,14 @@
   sentinel_ = GcRoot<mirror::Object>(sentinel);
 }
 
-bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) {
+bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) {
+  RuntimeArgumentMap runtime_options(std::move(runtime_options_in));
   ATRACE_BEGIN("Runtime::Init");
   CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize);
 
   MemMap::Init();
 
   using Opt = RuntimeArgumentMap;
-  RuntimeArgumentMap runtime_options;
-  std::unique_ptr<ParsedOptions> parsed_options(
-      ParsedOptions::Create(raw_options, ignore_unrecognized, &runtime_options));
-  if (parsed_options.get() == nullptr) {
-    LOG(ERROR) << "Failed to parse options";
-    ATRACE_END();
-    return false;
-  }
   VLOG(startup) << "Runtime::Init -verbose:startup enabled";
 
   QuasiAtomic::Startup();
@@ -1618,10 +1630,8 @@
   callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
 }
 
-void Runtime::StartProfiler(const char* profile_output_filename) {
+void Runtime::SetJitProfilingFilename(const char* profile_output_filename) {
   profile_output_filename_ = profile_output_filename;
-  profiler_started_ =
-      BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_);
 }
 
 // Transaction support.
@@ -1767,8 +1777,16 @@
   argv->push_back(feature_string);
 }
 
+void Runtime::MaybeSaveJitProfilingInfo() {
+  if (jit_.get() != nullptr && !profile_output_filename_.empty()) {
+    jit_->SaveProfilingInfo(profile_output_filename_);
+  }
+}
+
 void Runtime::UpdateProfilerState(int state) {
-  VLOG(profiler) << "Profiler state updated to " << state;
+  if (state == kProfileBackground) {
+    MaybeSaveJitProfilingInfo();
+  }
 }
 
 void Runtime::CreateJit() {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d61663c..93d8fcf 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -84,6 +84,7 @@
 class MonitorPool;
 class NullPointerHandler;
 class OatFileManager;
+struct RuntimeArgumentMap;
 class SignalCatcher;
 class StackOverflowHandler;
 class SuspensionHandler;
@@ -112,8 +113,17 @@
 
 class Runtime {
  public:
+  // Parse raw runtime options.
+  static bool ParseOptions(const RuntimeOptions& raw_options,
+                           bool ignore_unrecognized,
+                           RuntimeArgumentMap* runtime_options);
+
   // Creates and initializes a new runtime.
-  static bool Create(const RuntimeOptions& options, bool ignore_unrecognized)
+  static bool Create(RuntimeArgumentMap&& runtime_options)
+      SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
+
+  // Creates and initializes a new runtime.
+  static bool Create(const RuntimeOptions& raw_options, bool ignore_unrecognized)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
 
   // IsAotCompiler for compilers that don't have a running runtime. Only dex2oat currently.
@@ -457,7 +467,7 @@
     return &instrumentation_;
   }
 
-  void StartProfiler(const char* profile_output_filename);
+  void SetJitProfilingFilename(const char* profile_output_filename);
   void UpdateProfilerState(int state);
 
   // Transaction support.
@@ -599,7 +609,7 @@
 
   void BlockSignals();
 
-  bool Init(const RuntimeOptions& options, bool ignore_unrecognized)
+  bool Init(RuntimeArgumentMap&& runtime_options)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_);
   void InitNativeMethods() REQUIRES(!Locks::mutator_lock_);
   void InitThreadGroups(Thread* self);
@@ -608,12 +618,14 @@
   void StartDaemonThreads();
   void StartSignalCatcher();
 
+  void MaybeSaveJitProfilingInfo();
+
   // A pointer to the active runtime or null.
   static Runtime* instance_;
 
   // NOTE: these must match the gc::ProcessState values as they come directly from the framework.
   static constexpr int kProfileForground = 0;
-  static constexpr int kProfileBackgrouud = 1;
+  static constexpr int kProfileBackground = 1;
 
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
   uint64_t callee_save_methods_[kLastCalleeSaveType];
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 9051eda..5624285 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -71,6 +71,7 @@
 RUNTIME_OPTIONS_KEY (unsigned int,        JITWarmupThreshold,             jit::Jit::kDefaultWarmupThreshold)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheInitialCapacity,    jit::JitCodeCache::kInitialCapacity)
 RUNTIME_OPTIONS_KEY (MemoryKiB,           JITCodeCacheMaxCapacity,        jit::JitCodeCache::kMaxCapacity)
+RUNTIME_OPTIONS_KEY (bool,                JITSaveProfilingInfo,           false)
 RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \
                                           HSpaceCompactForOOMMinIntervalsMs,\
                                                                           MsToNs(100 * 1000))  // 100s
diff --git a/runtime/runtime_options.h b/runtime/runtime_options.h
index 88ac00a..4610f6f 100644
--- a/runtime/runtime_options.h
+++ b/runtime/runtime_options.h
@@ -17,14 +17,13 @@
 #ifndef ART_RUNTIME_RUNTIME_OPTIONS_H_
 #define ART_RUNTIME_RUNTIME_OPTIONS_H_
 
-#include "runtime/base/variant_map.h"
-#include "cmdline/cmdline_types.h"  // TODO: don't need to include this file here
+#include "base/variant_map.h"
+#include "cmdline_types.h"  // TODO: don't need to include this file here
 
 // Map keys
 #include <vector>
 #include <string>
-#include "runtime/base/logging.h"
-#include "cmdline/unit.h"
+#include "base/logging.h"
 #include "jdwp/jdwp.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 63e6326..90539b4 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -164,14 +164,20 @@
 
 class DeoptimizationContextRecord {
  public:
-  DeoptimizationContextRecord(const JValue& ret_val, bool is_reference,
+  DeoptimizationContextRecord(const JValue& ret_val,
+                              bool is_reference,
+                              bool from_code,
                               mirror::Throwable* pending_exception,
                               DeoptimizationContextRecord* link)
-      : ret_val_(ret_val), is_reference_(is_reference), pending_exception_(pending_exception),
+      : ret_val_(ret_val),
+        is_reference_(is_reference),
+        from_code_(from_code),
+        pending_exception_(pending_exception),
         link_(link) {}
 
   JValue GetReturnValue() const { return ret_val_; }
   bool IsReference() const { return is_reference_; }
+  bool GetFromCode() const { return from_code_; }
   mirror::Throwable* GetPendingException() const { return pending_exception_; }
   DeoptimizationContextRecord* GetLink() const { return link_; }
   mirror::Object** GetReturnValueAsGCRoot() {
@@ -189,6 +195,9 @@
   // Indicates whether the returned value is a reference. If so, the GC will visit it.
   const bool is_reference_;
 
+  // Whether the context was created from an explicit deoptimization in the code.
+  const bool from_code_;
+
   // The exception that was pending before deoptimization (or null if there was no pending
   // exception).
   mirror::Throwable* pending_exception_;
@@ -220,22 +229,28 @@
   DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord);
 };
 
-void Thread::PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+void Thread::PushDeoptimizationContext(const JValue& return_value,
+                                       bool is_reference,
+                                       bool from_code,
                                        mirror::Throwable* exception) {
   DeoptimizationContextRecord* record = new DeoptimizationContextRecord(
       return_value,
       is_reference,
+      from_code,
       exception,
       tlsPtr_.deoptimization_context_stack);
   tlsPtr_.deoptimization_context_stack = record;
 }
 
-void Thread::PopDeoptimizationContext(JValue* result, mirror::Throwable** exception) {
+void Thread::PopDeoptimizationContext(JValue* result,
+                                      mirror::Throwable** exception,
+                                      bool* from_code) {
   AssertHasDeoptimizationContext();
   DeoptimizationContextRecord* record = tlsPtr_.deoptimization_context_stack;
   tlsPtr_.deoptimization_context_stack = record->GetLink();
   result->SetJ(record->GetReturnValue().GetJ());
   *exception = record->GetPendingException();
+  *from_code = record->GetFromCode();
   delete record;
 }
 
@@ -2546,7 +2561,8 @@
     if (is_deoptimization) {
       // Save the exception into the deoptimization context so it can be restored
       // before entering the interpreter.
-      PushDeoptimizationContext(JValue(), false, exception);
+      PushDeoptimizationContext(
+          JValue(), /*is_reference */ false, /* from_code */ false, exception);
     }
   }
   // Don't leave exception visible while we try to find the handler, which may cause class
diff --git a/runtime/thread.h b/runtime/thread.h
index 4624f27..c556c36 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -849,10 +849,14 @@
   // and execute Java code, so there might be nested deoptimizations happening.
   // We need to save the ongoing deoptimization shadow frames and return
   // values on stacks.
-  void PushDeoptimizationContext(const JValue& return_value, bool is_reference,
+  // 'from_code' denotes whether the deoptimization was explicitly made from
+  // compiled code.
+  void PushDeoptimizationContext(const JValue& return_value,
+                                 bool is_reference,
+                                 bool from_code,
                                  mirror::Throwable* exception)
       SHARED_REQUIRES(Locks::mutator_lock_);
-  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception)
+  void PopDeoptimizationContext(JValue* result, mirror::Throwable** exception, bool* from_code)
       SHARED_REQUIRES(Locks::mutator_lock_);
   void AssertHasDeoptimizationContext()
       SHARED_REQUIRES(Locks::mutator_lock_);
@@ -1224,7 +1228,7 @@
     RuntimeStats stats;
   } tls64_;
 
-  struct PACKED(4) tls_ptr_sized_values {
+  struct PACKED(sizeof(void*)) tls_ptr_sized_values {
       tls_ptr_sized_values() : card_table(nullptr), exception(nullptr), stack_end(nullptr),
       managed_stack(), suspend_trigger(nullptr), jni_env(nullptr), tmp_jni_env(nullptr),
       self(nullptr), opeer(nullptr), jpeer(nullptr), stack_begin(nullptr), stack_size(0),
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 10600e2..a2d6363 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -23,28 +23,50 @@
 
 namespace art {
 
+// This is used only from debugger and test code.
 size_t CountModifiedUtf8Chars(const char* utf8) {
+  return CountModifiedUtf8Chars(utf8, strlen(utf8));
+}
+
+/*
+ * This does not validate UTF8 rules (nor did older code). But it gets the right answer
+ * for valid UTF-8 and that's fine because it's used only to size a buffer for later
+ * conversion.
+ *
+ * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows:
+ * U+0001  - U+007F   0xxxxxxx
+ * U+0080  - U+07FF   110xxxxx 10xxxxxx
+ * U+0800  - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
+ * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ *
+ * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from
+ * standard UTF-8).
+ * The four byte encoding converts to two utf16 characters.
+ */
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) {
+  DCHECK_LE(byte_count, strlen(utf8));
   size_t len = 0;
-  int ic;
-  while ((ic = *utf8++) != '\0') {
+  const char* end = utf8 + byte_count;
+  for (; utf8 < end; ++utf8) {
+    int ic = *utf8;
     len++;
-    if ((ic & 0x80) == 0) {
-      // one-byte encoding
+    if (LIKELY((ic & 0x80) == 0)) {
+      // One-byte encoding.
       continue;
     }
-    // two- or three-byte encoding
+    // Two- or three-byte encoding.
     utf8++;
     if ((ic & 0x20) == 0) {
-      // two-byte encoding
+      // Two-byte encoding.
       continue;
     }
     utf8++;
     if ((ic & 0x10) == 0) {
-      // three-byte encoding
+      // Three-byte encoding.
       continue;
     }
 
-    // four-byte encoding: needs to be converted into a surrogate
+    // Four-byte encoding: needs to be converted into a surrogate
     // pair.
     utf8++;
     len++;
@@ -52,6 +74,7 @@
   return len;
 }
 
+// This is used only from debugger and test code.
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
   while (*utf8_data_in != '\0') {
     const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in);
@@ -65,13 +88,53 @@
   }
 }
 
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) {
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars,
+                                const char* utf8_data_in, size_t in_bytes) {
+  const char *in_start = utf8_data_in;
+  const char *in_end = utf8_data_in + in_bytes;
+  uint16_t *out_p = utf16_data_out;
+
+  if (LIKELY(out_chars == in_bytes)) {
+    // Common case where all characters are ASCII.
+    for (const char *p = in_start; p < in_end;) {
+      // Safe even if char is signed because ASCII characters always have
+      // the high bit cleared.
+      *out_p++ = dchecked_integral_cast<uint16_t>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
+  for (const char *p = in_start; p < in_end;) {
+    const uint32_t ch = GetUtf16FromUtf8(&p);
+    const uint16_t leading = GetLeadingUtf16Char(ch);
+    const uint16_t trailing = GetTrailingUtf16Char(ch);
+
+    *out_p++ = leading;
+    if (trailing != 0) {
+      *out_p++ = trailing;
+    }
+  }
+}
+
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count) {
+  if (LIKELY(byte_count == char_count)) {
+    // Common case where all characters are ASCII.
+    const uint16_t *utf16_end = utf16_in + char_count;
+    for (const uint16_t *p = utf16_in; p < utf16_end;) {
+      *utf8_out++ = dchecked_integral_cast<char>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
   while (char_count--) {
     const uint16_t ch = *utf16_in++;
     if (ch > 0 && ch <= 0x7f) {
       *utf8_out++ = ch;
     } else {
-      // char_count == 0 here implies we've encountered an unpaired
+      // Char_count == 0 here implies we've encountered an unpaired
       // surrogate and we have no choice but to encode it as 3-byte UTF
       // sequence. Note that unpaired surrogates can occur as a part of
       // "normal" operation.
@@ -115,8 +178,8 @@
   return static_cast<int32_t>(hash);
 }
 
-size_t ComputeModifiedUtf8Hash(const char* chars) {
-  size_t hash = 0;
+uint32_t ComputeModifiedUtf8Hash(const char* chars) {
+  uint32_t hash = 0;
   while (*chars != '\0') {
     hash = hash * 31 + *chars++;
   }
@@ -161,34 +224,31 @@
 
 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
   size_t result = 0;
-  while (char_count--) {
+  const uint16_t *end = chars + char_count;
+  while (chars < end) {
     const uint16_t ch = *chars++;
-    if (ch > 0 && ch <= 0x7f) {
-      ++result;
-    } else if (ch >= 0xd800 && ch <= 0xdbff) {
-      if (char_count > 0) {
+    if (LIKELY(ch != 0 && ch < 0x80)) {
+      result++;
+      continue;
+    }
+    if (ch < 0x800) {
+      result += 2;
+      continue;
+    }
+    if (ch >= 0xd800 && ch < 0xdc00) {
+      if (chars < end) {
         const uint16_t ch2 = *chars;
         // If we find a properly paired surrogate, we emit it as a 4 byte
         // UTF sequence. If we find an unpaired leading or trailing surrogate,
         // we emit it as a 3 byte sequence like would have done earlier.
-        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+        if (ch2 >= 0xdc00 && ch2 < 0xe000) {
           chars++;
-          char_count--;
-
           result += 4;
-        } else {
-          result += 3;
+          continue;
         }
-      } else {
-        // This implies we found an unpaired trailing surrogate at the end
-        // of a string.
-        result += 3;
       }
-    } else if (ch > 0x7ff) {
-      result += 3;
-    } else {
-      result += 2;
     }
+    result += 3;
   }
   return result;
 }
diff --git a/runtime/utf.h b/runtime/utf.h
index 1193d29..4abd605 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -40,6 +40,7 @@
  * Returns the number of UTF-16 characters in the given modified UTF-8 string.
  */
 size_t CountModifiedUtf8Chars(const char* utf8);
+size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count);
 
 /*
  * Returns the number of modified UTF-8 bytes needed to represent the given
@@ -51,6 +52,8 @@
  * Convert from Modified UTF-8 to UTF-16.
  */
 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in);
+void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars,
+                                const char* utf8_in, size_t in_bytes);
 
 /*
  * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner
@@ -71,7 +74,8 @@
  * this anyway, so if you want a NUL-terminated string, you know where to
  * put the NUL byte.
  */
-void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count);
+void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count,
+                                const uint16_t* utf16_in, size_t char_count);
 
 /*
  * The java.lang.String hashCode() algorithm.
@@ -81,8 +85,8 @@
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
 
 // Compute a hash code of a modified UTF-8 string. Not the standard java hash since it returns a
-// size_t and hashes individual chars instead of codepoint words.
-size_t ComputeModifiedUtf8Hash(const char* chars);
+// uint32_t and hashes individual chars instead of codepoint words.
+uint32_t ComputeModifiedUtf8Hash(const char* chars);
 
 /*
  * Retrieve the next UTF-16 character or surrogate pair from a UTF-8 string.
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 94a6ea5..5239e40 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -19,6 +19,7 @@
 #include "common_runtime_test.h"
 #include "utf-inl.h"
 
+#include <map>
 #include <vector>
 
 namespace art {
@@ -48,7 +49,7 @@
 };
 
 // A test string that contains a UTF-8 encoding of a surrogate pair
-// (code point = U+10400)
+// (code point = U+10400).
 static const uint8_t kSurrogateEncoding[] = {
     0xed, 0xa0, 0x81,
     0xed, 0xb0, 0x80,
@@ -66,13 +67,13 @@
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(1, ptr, start);
 
-  // Two byte sequence
+  // Two byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(3, ptr, start);
 
-  // Three byte sequence
+  // Three byte sequence.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -84,7 +85,7 @@
   EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair));
   EXPECT_ARRAY_POSITION(10, ptr, start);
 
-  // Null terminator
+  // Null terminator.
   pair = GetUtf16FromUtf8(&ptr);
   EXPECT_EQ(0, GetLeadingUtf16Char(pair));
   EXPECT_EQ(0, GetTrailingUtf16Char(pair));
@@ -117,7 +118,8 @@
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
-  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size());
+  ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(),
+                             &input[0], input.size());
   EXPECT_EQ(expected, output);
 }
 
@@ -139,10 +141,10 @@
   AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f });
 
   AssertConversion({
-      0xd802, 0xdc02,  // Surrogate pair
-      0xdef0, 0xdcff,  // Three byte encodings
-      0x0101, 0x0000,  // Two byte encodings
-      'p'   , 'p'      // One byte encoding
+      0xd802, 0xdc02,  // Surrogate pair.
+      0xdef0, 0xdcff,  // Three byte encodings.
+      0x0101, 0x0000,  // Two byte encodings.
+      'p'   , 'p'      // One byte encoding.
     }, {
       0xf0, 0x90, 0xa0, 0x82,
       0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf,
@@ -155,9 +157,225 @@
   // Unpaired trailing surrogate at the end of input.
   AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 });
   // Unpaired (or incorrectly paired) surrogates in the middle of the input.
-  AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' });
-  AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' });
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes {
+      {{ 'h' }, { 'h' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x81 }, { 0xc2, 0x81 }},
+      {{ 0x801 }, { 0xe0, 0xa0, 0x81 }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes {
+      {{ 'e' }, { 'e' }},
+      {{ 0 }, { 0xc0, 0x80 }},
+      {{ 0x7ff }, { 0xdf, 0xbf }},
+      {{ 0xffff }, { 0xef, 0xbf, 0xbf }},
+  };
+  const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests {
+      {{ 0xd801 }, { 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }},
+      {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }},
+      {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }},
+  };
+  for (const auto& prefix : prefixes) {
+    const std::vector<uint16_t>& prefix_in = prefix.first;
+    const std::vector<uint8_t>& prefix_out = prefix.second;
+    for (const auto& test : tests) {
+      const std::vector<uint16_t>& test_in = test.first;
+      const std::vector<uint8_t>& test_out = test.second;
+      for (const auto& suffix : suffixes) {
+        const std::vector<uint16_t>& suffix_in = suffix.first;
+        const std::vector<uint8_t>& suffix_out = suffix.second;
+        std::vector<uint16_t> in = prefix_in;
+        in.insert(in.end(), test_in.begin(), test_in.end());
+        in.insert(in.end(), suffix_in.begin(), suffix_in.end());
+        std::vector<uint8_t> out = prefix_out;
+        out.insert(out.end(), test_out.begin(), test_out.end());
+        out.insert(out.end(), suffix_out.begin(), suffix_out.end());
+        AssertConversion(in, out);
+      }
+    }
+  }
+}
+
+// Old versions of functions, here to compare answers with optimized versions.
+
+size_t CountModifiedUtf8Chars_reference(const char* utf8) {
+  size_t len = 0;
+  int ic;
+  while ((ic = *utf8++) != '\0') {
+    len++;
+    if ((ic & 0x80) == 0) {
+      // one-byte encoding
+      continue;
+    }
+    // two- or three-byte encoding
+    utf8++;
+    if ((ic & 0x20) == 0) {
+      // two-byte encoding
+      continue;
+    }
+    utf8++;
+    if ((ic & 0x10) == 0) {
+      // three-byte encoding
+      continue;
+    }
+
+    // four-byte encoding: needs to be converted into a surrogate
+    // pair.
+    utf8++;
+    len++;
+  }
+  return len;
+}
+
+static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) {
+  size_t result = 0;
+  while (char_count--) {
+    const uint16_t ch = *chars++;
+    if (ch > 0 && ch <= 0x7f) {
+      ++result;
+    } else if (ch >= 0xd800 && ch <= 0xdbff) {
+      if (char_count > 0) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          chars++;
+          char_count--;
+
+          result += 4;
+        } else {
+          result += 3;
+        }
+      } else {
+        // This implies we found an unpaired trailing surrogate at the end
+        // of a string.
+        result += 3;
+      }
+    } else if (ch > 0x7ff) {
+      result += 3;
+    } else {
+      result += 2;
+    }
+  }
+  return result;
+}
+
+static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in,
+                                                 size_t char_count) {
+  while (char_count--) {
+    const uint16_t ch = *utf16_in++;
+    if (ch > 0 && ch <= 0x7f) {
+      *utf8_out++ = ch;
+    } else {
+      // Char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
+      if (ch > 0x07ff) {
+        // Three byte encoding.
+        *utf8_out++ = (ch >> 12) | 0xe0;
+        *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
+        *utf8_out++ = (ch >> 6) | 0xc0;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      }
+    }
+  }
+}
+
+// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again.
+
+static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) {
+  first = (code_point >> 10) + 0xd7c0;
+  second = (code_point & 0x03ff) + 0xdc00;
+}
+
+static void testConversions(uint16_t *buf, int char_count) {
+  char bytes_test[8], bytes_reference[8];
+  uint16_t out_buf_test[4], out_buf_reference[4];
+  int byte_count_test, byte_count_reference;
+  int char_count_test, char_count_reference;
+
+  // Calculate the number of utf-8 bytes for the utf-16 chars.
+  byte_count_reference = CountUtf8Bytes_reference(buf, char_count);
+  byte_count_test = CountUtf8Bytes(buf, char_count);
+  EXPECT_EQ(byte_count_reference, byte_count_test);
+
+  // Convert the utf-16 string to utf-8 bytes.
+  ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count);
+  ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count);
+  for (int i = 0; i < byte_count_test; ++i) {
+    EXPECT_EQ(bytes_reference[i], bytes_test[i]);
+  }
+
+  // Calculate the number of utf-16 chars from the utf-8 bytes.
+  bytes_reference[byte_count_reference] = 0;  // Reference function needs null termination.
+  char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference);
+  char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test);
+  EXPECT_EQ(char_count, char_count_reference);
+  EXPECT_EQ(char_count, char_count_test);
+
+  // Convert the utf-8 bytes back to utf-16 chars.
+  // Does not need copied _reference version of the function because the original
+  // function with the old API is retained for debug/testing code.
+  ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference);
+  ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test);
+  for (int i = 0; i < char_count_test; ++i) {
+    EXPECT_EQ(buf[i], out_buf_reference[i]);
+    EXPECT_EQ(buf[i], out_buf_test[i]);
+  }
+}
+
+TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) {
+  for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) {
+    uint16_t buf[4];
+    if (codePoint <= 0xffff) {
+      if (codePoint >= 0xd800 && codePoint <= 0xdfff) {
+        // According to the Unicode standard, no character will ever
+        // be assigned to these code points, and they can not be encoded
+        // into either utf-16 or utf-8.
+        continue;
+      }
+      buf[0] = 'h';
+      buf[1] = codePoint;
+      buf[2] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+    } else {
+      buf[0] = 'h';
+      codePointToSurrogatePair(codePoint, buf[1], buf[2]);
+      buf[3] = 'e';
+      testConversions(buf, 2);
+      testConversions(buf, 3);
+      testConversions(buf, 4);
+      testConversions(buf + 1, 1);
+      testConversions(buf + 1, 2);
+      testConversions(buf + 1, 3);
+    }
+  }
 }
 
 }  // namespace art
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 68db7e3..eddc3a4 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1392,21 +1392,6 @@
   return filename;
 }
 
-bool IsZipMagic(uint32_t magic) {
-  return (('P' == ((magic >> 0) & 0xff)) &&
-          ('K' == ((magic >> 8) & 0xff)));
-}
-
-bool IsDexMagic(uint32_t magic) {
-  return DexFile::IsMagicValid(reinterpret_cast<const uint8_t*>(&magic));
-}
-
-bool IsOatMagic(uint32_t magic) {
-  return (memcmp(reinterpret_cast<const uint8_t*>(magic),
-                 OatHeader::kOatMagic,
-                 sizeof(OatHeader::kOatMagic)) == 0);
-}
-
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg) {
   const std::string command_line(Join(arg_vector, ' '));
 
diff --git a/runtime/utils.h b/runtime/utils.h
index 8b7941a..5b9e963 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -273,11 +273,6 @@
 // Returns the system location for an image
 std::string GetSystemImageFilename(const char* location, InstructionSet isa);
 
-// Check whether the given magic matches a known file type.
-bool IsZipMagic(uint32_t magic);
-bool IsDexMagic(uint32_t magic);
-bool IsOatMagic(uint32_t magic);
-
 // Wrapper on fork/execv to run a command in a subprocess.
 bool Exec(std::vector<std::string>& arg_vector, std::string* error_msg);
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 364b8ce..cf27ff2 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -192,7 +192,7 @@
     }
     previous_method_idx = method_idx;
     InvokeType type = it->GetMethodInvokeType(*class_def);
-    ArtMethod* method = linker->ResolveMethod(
+    ArtMethod* method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
         *dex_file, method_idx, dex_cache, class_loader, nullptr, type);
     if (method == nullptr) {
       DCHECK(self->IsExceptionPending());
@@ -3638,6 +3638,30 @@
   const RegType& referrer = GetDeclaringClass();
   auto* cl = Runtime::Current()->GetClassLinker();
   auto pointer_size = cl->GetImagePointerSize();
+
+  // Check that interface methods are static or match interface classes.
+  // We only allow statics if we don't have default methods enabled.
+  if (klass->IsInterface()) {
+    Runtime* runtime = Runtime::Current();
+    const bool default_methods_supported =
+        runtime == nullptr ||
+        runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
+    if (method_type != METHOD_INTERFACE &&
+        (!default_methods_supported || method_type != METHOD_STATIC)) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in an interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  } else {
+    if (method_type == METHOD_INTERFACE) {
+      Fail(VERIFY_ERROR_CLASS_CHANGE)
+          << "interface method " << PrettyMethod(dex_method_idx, *dex_file_)
+          << " is in a non-interface class " << PrettyClass(klass);
+      return nullptr;
+    }
+  }
+
   ArtMethod* res_method = dex_cache_->GetResolvedMethod(dex_method_idx, pointer_size);
   if (res_method == nullptr) {
     const char* name = dex_file_->GetMethodName(method_id);
@@ -3692,23 +3716,6 @@
                                       << PrettyMethod(res_method);
     return nullptr;
   }
-  // Check that interface methods are static or match interface classes.
-  // We only allow statics if we don't have default methods enabled.
-  Runtime* runtime = Runtime::Current();
-  const bool default_methods_supported =
-      runtime == nullptr ||
-      runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods);
-  if (klass->IsInterface() &&
-      method_type != METHOD_INTERFACE &&
-      (!default_methods_supported || method_type != METHOD_STATIC)) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "non-interface method " << PrettyMethod(res_method)
-                                    << " is in an interface class " << PrettyClass(klass);
-    return nullptr;
-  } else if (!klass->IsInterface() && method_type == METHOD_INTERFACE) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "interface method " << PrettyMethod(res_method)
-                                    << " is in a non-interface class " << PrettyClass(klass);
-    return nullptr;
-  }
   // See if the method type implied by the invoke instruction matches the access flags for the
   // target method.
   if ((method_type == METHOD_DIRECT && (!res_method->IsDirect() || res_method->IsStatic())) ||
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 3f00a1a..057b351 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -21,6 +21,8 @@
 
 # android.test.anno.MissingAnnotation is available at compile time...
 ${JAVAC} -d classes `find src -name '*.java'`
+# overwrite RenamedEnum
+${JAVAC} -d classes `find src2 -name '*.java'`
 
 # ...but not at run time.
 rm 'classes/android/test/anno/MissingAnnotation.class'
diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt
index e1c3dad..3d9fd8b 100644
--- a/test/005-annotations/expected.txt
+++ b/test/005-annotations/expected.txt
@@ -89,7 +89,7 @@
   annotations on FIELD int android.test.anno.FullyNoted.mBar:
     @android.test.anno.AnnoFancyField(nombre=fubar)
       interface android.test.anno.AnnoFancyField
-    aff: @android.test.anno.AnnoFancyField(nombre=fubar) / class $Proxy13
+    aff: @android.test.anno.AnnoFancyField(nombre=fubar) / true
     --> nombre is 'fubar'
 
 SimplyNoted.get(AnnoSimpleType) = @android.test.anno.AnnoSimpleType()
@@ -108,3 +108,4 @@
 
 Get annotation with missing class should not throw
 Got expected TypeNotPresentException
+Got expected NoSuchFieldError
diff --git a/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
new file mode 100644
index 0000000..7a15c64
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface AnnoRenamedEnumMethod {
+    RenamedEnumClass.RenamedEnum renamed() default RenamedEnumClass.RenamedEnum.FOO;
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..cfba819
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOO, BAR };
+}
diff --git a/test/005-annotations/src/android/test/anno/RenamedNoted.java b/test/005-annotations/src/android/test/anno/RenamedNoted.java
new file mode 100644
index 0000000..aae3a3f
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/RenamedNoted.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+public class RenamedNoted {
+    @AnnoRenamedEnumMethod(renamed=RenamedEnumClass.RenamedEnum.BAR)
+    public int bar() {
+        return 0;
+    }
+}
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 7b74a73..bc89f16 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -1,9 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package android.test.anno;
 
 import java.lang.annotation.Annotation;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
 import java.util.TreeMap;
 
 public class TestAnnotations {
@@ -65,7 +82,7 @@
             AnnoFancyField aff;
             aff = (AnnoFancyField) f.getAnnotation(AnnoFancyField.class);
             if (aff != null) {
-                System.out.println("    aff: " + aff + " / " + aff.getClass());
+                System.out.println("    aff: " + aff + " / " + Proxy.isProxyClass(aff.getClass()));
                 System.out.println("    --> nombre is '" + aff.nombre() + "'");
             }
         }
@@ -199,5 +216,15 @@
         } catch (TypeNotPresentException expected) {
             System.out.println("Got expected TypeNotPresentException");
         }
+
+        // Test renamed enums.
+        try {
+            for (Method m: RenamedNoted.class.getDeclaredMethods()) {
+                Annotation[] annos = m.getDeclaredAnnotations();
+                System.out.println("  annotations on METH " + m + ":");
+            }
+        } catch (NoSuchFieldError expected) {
+            System.out.println("Got expected NoSuchFieldError");
+        }
     }
 }
diff --git a/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
new file mode 100644
index 0000000..5a2fe36
--- /dev/null
+++ b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+@Target(ElementType.METHOD)
+@Retention(RetentionPolicy.RUNTIME)
+
+public @interface RenamedEnumClass {
+    enum RenamedEnum { FOOBAR };
+}
diff --git a/test/044-proxy/expected.txt b/test/044-proxy/expected.txt
index 052c8fa..be7023e 100644
--- a/test/044-proxy/expected.txt
+++ b/test/044-proxy/expected.txt
@@ -42,7 +42,7 @@
  (no args)
 --- blob
 Success: method blob res=mix
-$Proxy1.getTrace null:-1
+$PROXY_CLASS_NAME0$.getTrace null:-1
 Invoke public abstract void Shapes.upChuck()
  (no args)
 Got expected ioobe
@@ -51,7 +51,7 @@
 Got expected ie
 
 Proxy interfaces: [interface Quads, interface Colors, interface Trace]
-Proxy methods: [public final java.lang.String $Proxy1.blob(), public final double $Proxy1.blue(int), public final R0a $Proxy1.checkMe(), public final R0aa $Proxy1.checkMe(), public final R0base $Proxy1.checkMe(), public final void $Proxy1.circle(int), public final boolean $Proxy1.equals(java.lang.Object), public final void $Proxy1.getTrace(), public final int $Proxy1.green(double), public final int $Proxy1.hashCode(), public final int $Proxy1.mauve(java.lang.String), public final int $Proxy1.rectangle(int,int), public final int $Proxy1.red(float), public final int $Proxy1.square(int,int), public final java.lang.String $Proxy1.toString(), public final int $Proxy1.trapezoid(int,double,int), public final void $Proxy1.upCheck() throws java.lang.InterruptedException, public final void $Proxy1.upChuck()]
+Proxy methods: [public final java.lang.String $PROXY_CLASS_NAME0$.blob(), public final double $PROXY_CLASS_NAME0$.blue(int), public final R0a $PROXY_CLASS_NAME0$.checkMe(), public final R0aa $PROXY_CLASS_NAME0$.checkMe(), public final R0base $PROXY_CLASS_NAME0$.checkMe(), public final void $PROXY_CLASS_NAME0$.circle(int), public final boolean $PROXY_CLASS_NAME0$.equals(java.lang.Object), public final void $PROXY_CLASS_NAME0$.getTrace(), public final int $PROXY_CLASS_NAME0$.green(double), public final int $PROXY_CLASS_NAME0$.hashCode(), public final int $PROXY_CLASS_NAME0$.mauve(java.lang.String), public final int $PROXY_CLASS_NAME0$.rectangle(int,int), public final int $PROXY_CLASS_NAME0$.red(float), public final int $PROXY_CLASS_NAME0$.square(int,int), public final java.lang.String $PROXY_CLASS_NAME0$.toString(), public final int $PROXY_CLASS_NAME0$.trapezoid(int,double,int), public final void $PROXY_CLASS_NAME0$.upCheck() throws java.lang.InterruptedException, public final void $PROXY_CLASS_NAME0$.upChuck()]
 Decl annos: []
 Param annos (0) : []
 Modifiers: 17
@@ -84,7 +84,7 @@
 Invoke public abstract void InterfaceW1.bothThrowBase() throws BaseException,SubException,SubSubException
  (no args)
 Got expected exception
-Proxy methods: [public final boolean $Proxy3.equals(java.lang.Object), public final java.lang.Object $Proxy3.foo(), public final java.lang.String $Proxy3.foo(), public final int $Proxy3.hashCode(), public final java.lang.String $Proxy3.toString()]
+Proxy methods: [public final boolean $PROXY_CLASS_NAME1$.equals(java.lang.Object), public final java.lang.Object $PROXY_CLASS_NAME1$.foo(), public final java.lang.String $PROXY_CLASS_NAME1$.foo(), public final int $PROXY_CLASS_NAME1$.hashCode(), public final java.lang.String $PROXY_CLASS_NAME1$.toString()]
 Invocation of public abstract java.lang.String NarrowingTest$I2.foo()
 Invoking foo using I2 type: hello
 Invocation of public abstract java.lang.Object NarrowingTest$I1.foo()
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 1573297..445a6cc 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -84,7 +84,8 @@
         });
         System.out.println("Proxy interfaces: " +
             Arrays.deepToString(proxy.getClass().getInterfaces()));
-        System.out.println("Proxy methods: " + Arrays.deepToString(methods));
+        System.out.println("Proxy methods: " +
+            Main.replaceProxyClassNamesForOutput(Arrays.deepToString(methods)));
         Method meth = methods[methods.length -1];
         System.out.println("Decl annos: " + Arrays.deepToString(meth.getDeclaredAnnotations()));
         Annotation[][] paramAnnos = meth.getParameterAnnotations();
@@ -100,6 +101,7 @@
         /* create the proxy class */
         Class proxyClass = Proxy.getProxyClass(Shapes.class.getClassLoader(),
                             new Class[] { Quads.class, Colors.class, Trace.class });
+        Main.registerProxyClassName(proxyClass.getCanonicalName());
 
         /* create a proxy object, passing the handler object in */
         Object proxy = null;
@@ -262,7 +264,8 @@
             for (int i = 0; i < stackTrace.length; i++) {
                 StackTraceElement ste = stackTrace[i];
                 if (ste.getMethodName().equals("getTrace")) {
-                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                  String outputClassName = Main.replaceProxyClassNamesForOutput(ste.getClassName());
+                  System.out.println(outputClassName + "." + ste.getMethodName() + " " +
                                      ste.getFileName() + ":" + ste.getLineNumber());
                 }
             }
@@ -276,7 +279,8 @@
             for (int i = 0; i < stackTrace.length; i++) {
                 StackTraceElement ste = stackTrace[i];
                 if (ste.getMethodName().equals("getTrace")) {
-                  System.out.println(ste.getClassName() + "." + ste.getMethodName() + " " +
+                  String outputClassName = Main.replaceProxyClassNamesForOutput(ste.getClassName());
+                  System.out.println(outputClassName + "." + ste.getMethodName() + " " +
                                      ste.getFileName() + ":" + ste.getLineNumber());
                 }
             }
diff --git a/test/044-proxy/src/Main.java b/test/044-proxy/src/Main.java
index 05e8e5b..1f23b95 100644
--- a/test/044-proxy/src/Main.java
+++ b/test/044-proxy/src/Main.java
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+import java.util.HashMap;
+
 /**
  * Test java.lang.reflect.Proxy
  */
@@ -30,4 +32,24 @@
         FloatSelect.main(null);
         NativeProxy.main(args);
     }
+
+    // The following code maps from the actual proxy class names (eg $Proxy2) to their test output
+    // names (eg $PROXY_CLASS_NAME1$). This is to avoid the flaky test failures due to potentially
+    // undeterministic proxy class naming.
+
+    public static void registerProxyClassName(String proxyClassName) {
+        proxyClassNameMap.put(proxyClassName,
+                              "$PROXY_CLASS_NAME" + (uniqueTestProxyClassNum++) + "$");
+    }
+
+    public static String replaceProxyClassNamesForOutput(String str) {
+        for (String key : proxyClassNameMap.keySet()) {
+            str = str.replace(key, proxyClassNameMap.get(key));
+        }
+        return str;
+    }
+
+    private static final HashMap<String, String> proxyClassNameMap = new HashMap<String, String>();
+
+    private static int uniqueTestProxyClassNum = 0;
 }
diff --git a/test/044-proxy/src/NarrowingTest.java b/test/044-proxy/src/NarrowingTest.java
index 3b94b76..5b80d72 100644
--- a/test/044-proxy/src/NarrowingTest.java
+++ b/test/044-proxy/src/NarrowingTest.java
@@ -45,9 +45,11 @@
                        }
                    }
                });
+       Main.registerProxyClassName(proxy.getClass().getCanonicalName());
 
        Method[] methods = proxy.getClass().getDeclaredMethods();
-       System.out.println("Proxy methods: " + Arrays.deepToString(methods));
+       System.out.println("Proxy methods: " +
+                          Main.replaceProxyClassNamesForOutput(Arrays.deepToString(methods)));
 
        System.out.println("Invoking foo using I2 type: " + proxy.foo());
 
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index c932761..0b87a4f 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -31,9 +31,9 @@
 30 (class java.lang.Integer)
 62 (class java.lang.Long)
 14 (class java.lang.Short)
-[public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
-[public native char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), native void java.lang.String.getCharsNoCheck(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), native void java.lang.String.setCharAt(int,char), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public native [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private char java.lang.String.foldCase(char), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
+[java.lang.String(int,int,char[]), public java.lang.String(), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder)]
+[private final int java.lang.String.count, private int java.lang.String.hashCode, private static final char java.lang.String.REPLACEMENT_CHAR, private static final char[] java.lang.String.ASCII, private static final long java.lang.String.serialVersionUID, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER]
+[native void java.lang.String.getCharsNoCheck(int,int,char[],int), native void java.lang.String.setCharAt(int,char), private char java.lang.String.foldCase(char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), private native int java.lang.String.fastIndexOf(int,int), private native java.lang.String java.lang.String.fastSubstring(int,int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public boolean java.lang.String.isEmpty(), public boolean java.lang.String.matches(java.lang.String), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public int java.lang.String.compareTo(java.lang.Object), public int java.lang.String.compareToIgnoreCase(java.lang.String), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public int java.lang.String.offsetByCodePoints(int,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public native [C java.lang.String.toCharArray(), public native char java.lang.String.charAt(int), public native int java.lang.String.compareTo(java.lang.String), public native java.lang.String java.lang.String.concat(java.lang.String), public native java.lang.String java.lang.String.intern(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), public static java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(long), public void java.lang.String.getBytes(int,int,byte[],int), public void java.lang.String.getChars(int,int,char[],int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
 0
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index bf3a574..1245852 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -157,10 +157,28 @@
     System.out.println(o + " (" + (o != null ? o.getClass() : "null") + ")");
   }
 
+  /**
+   * Sorts the input array using the comparator and returns the sorted array.
+   */
+  private static Object[] sort(Object[] objects, Comparator<Object> comp) {
+    Arrays.sort(objects, comp);
+    return objects;
+  }
+
   public static void testMethodReflection() throws Exception {
-    System.out.println(Arrays.toString(String.class.getDeclaredConstructors()));
-    System.out.println(Arrays.toString(String.class.getDeclaredFields()));
-    System.out.println(Arrays.toString(String.class.getDeclaredMethods()));
+    Comparator<Object> comp = new Comparator<Object>() {
+      public int compare(Object a, Object b) {
+        return a.toString().compareTo(b.toString());
+      }
+      public boolean equals(Object b) {
+        return this == b;
+      }
+    };
+
+    // Sort the return values by their string values since the order is undefined by the spec.
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredConstructors(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredFields(), comp)));
+    System.out.println(Arrays.toString(sort(String.class.getDeclaredMethods(), comp)));
 
     System.out.println(Arrays.toString(Main.class.getInterfaces()));
     System.out.println(Arrays.toString(String.class.getInterfaces()));
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index ffeae7d..c3d2759 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -652,20 +652,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -685,27 +684,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo2(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i <= end; i++) {
@@ -725,25 +722,25 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-
   /// CHECK-START: void Main.foo3(int[], int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end < array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = 3 ; i <= end; i++) {
@@ -770,18 +767,19 @@
   /// CHECK: ArraySet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for end <= array.length,
+    // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
     for (int i = end ; i > 0; i--) {
@@ -816,14 +814,18 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  //  array.length is defined before the loop header so no phi is needed.
-  /// CHECK-NOT: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
@@ -831,8 +833,8 @@
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
     }
-    // One HDeoptimize will be added.
-    // It's for (end - 2 <= array.length - 2).
+    // Several HDeoptimize will be added. Two for each index.
+    // The null check is not necessary.
     for (int i = end - 2 ; i > 0; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -859,7 +861,6 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-
   /// CHECK-START: void Main.foo6(int[], int, int, boolean) BCE (after)
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
@@ -874,23 +875,27 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
   /// CHECK: Goto
-  /// CHECK-NOT: Deoptimize
+  /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
-    // Three HDeoptimize will be added. One for
-    // start >= 2, one for end <= array.length - 3,
-    // and one for null check on array (to hoist null
-    // check and array.length out of loop).
+    // Several HDeoptimize will be added.
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
@@ -914,20 +919,19 @@
   /// CHECK: ArrayGet
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
-  /// CHECK: Goto
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo7(int[] array, int start, int end, boolean lowEnd) {
-    // Three HDeoptimize will be added. One for
-    // start >= 0, one for end <= array.length,
+    // Three HDeoptimize will be added. One for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
     for (int i = start ; i < end; i++) {
@@ -955,26 +959,28 @@
   /// CHECK: Phi
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArraySet
-  //  Added blocks for deoptimization.
+  //  Added blocks at end for deoptimization.
+  /// CHECK: Exit
   /// CHECK: If
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
-  /// CHECK: Deoptimize
+  /// CHECK: Goto
+  /// CHECK: Goto
+  /// CHECK: If
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Goto
-  /// CHECK: Phi
+  /// CHECK: Goto
   /// CHECK: Goto
 
   void foo8(int[][] matrix, int start, int end) {
-    // Three HDeoptimize will be added for the outer loop.
-    // start >= 0, end <= matrix.length, and null check on matrix.
-    // Three HDeoptimize will be added for the inner loop
-    // start >= 0 (TODO: this may be optimized away),
-    // end <= row.length, and null check on row.
+    // Three HDeoptimize will be added for the outer loop,
+    // two for the index, and null check on matrix. Same
+    // for the inner loop.
     for (int i = start; i < end; i++) {
       int[] row = matrix[i];
       for (int j = start; j < end; j++) {
@@ -994,15 +1000,22 @@
   //  loop for loop body entry test.
   /// CHECK: Deoptimize
   /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
   /// CHECK-NOT: Deoptimize
   /// CHECK: Phi
   /// CHECK-NOT: NullCheck
   /// CHECK-NOT: BoundsCheck
   /// CHECK: ArrayGet
 
+  /// CHECK-START: void Main.foo9(int[], boolean) instruction_simplifier_after_bce (after)
+  //  Simplification removes the redundant check
+  /// CHECK: Deoptimize
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: Deoptimize
+
   void foo9(int[] array, boolean expectInterpreter) {
-    // Two HDeoptimize will be added. One for
-    // 10 <= array.length, and one for null check on array.
+    // Two HDeoptimize will be added. Two for the index
+    // and one for null check on array.
     for (int i = 0 ; i < 10; i++) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index ec63057..f1f80ca 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -618,6 +618,57 @@
     getSuper();
   }
 
+  /// CHECK-START: void Main.testLoopPhiWithNullFirstInput(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<Main:l\d+>>      NewInstance klass:Main exact:true
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<LoopPhi>>,<<Main>>] klass:Main exact:true
+  private void testLoopPhiWithNullFirstInput(boolean cond) {
+    Main a = null;
+    while (a == null) {
+      if (cond) {
+        a = new Main();
+      }
+    }
+  }
+
+  /// CHECK-START: void Main.testLoopPhisWithNullAndCrossUses(boolean) reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<PhiA:l\d+>>      Phi [<<Null>>,<<PhiB:l\d+>>,<<PhiA>>] klass:java.lang.Object exact:false
+  /// CHECK-DAG:  <<PhiB>>           Phi [<<Null>>,<<PhiB>>,<<PhiA>>] klass:java.lang.Object exact:false
+  private void testLoopPhisWithNullAndCrossUses(boolean cond) {
+    Main a = null;
+    Main b = null;
+    while (a == null) {
+      if (cond) {
+        a = b;
+      } else {
+        b = a;
+      }
+    }
+  }
+
+  /// CHECK-START: java.lang.Object[] Main.testInstructionsWithUntypedParent() reference_type_propagation (after)
+  /// CHECK-DAG:  <<Null:l\d+>>      NullConstant
+  /// CHECK-DAG:  <<LoopPhi:l\d+>>   Phi [<<Null>>,<<Phi:l\d+>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Array:l\d+>>     NewArray klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<Phi>>            Phi [<<Array>>,<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:  <<NC:l\d+>>        NullCheck [<<LoopPhi>>] klass:java.lang.Object[] exact:true
+  /// CHECK-DAG:                     ArrayGet [<<NC>>,{{i\d+}}] klass:java.lang.Object exact:false
+  private Object[] testInstructionsWithUntypedParent() {
+    Object[] array = null;
+    boolean cond = true;
+    for (int i = 0; i < 10; ++i) {
+      if (cond) {
+        array = new Object[10];
+        array[0] = new Object();
+        cond = false;
+      } else {
+        array[i] = array[0];
+      }
+    }
+    return array;
+  }
+
   public static void main(String[] args) {
   }
 }
diff --git a/test/464-checker-inline-sharpen-calls/src/Main.java b/test/464-checker-inline-sharpen-calls/src/Main.java
index 6dce96c..5080f142 100644
--- a/test/464-checker-inline-sharpen-calls/src/Main.java
+++ b/test/464-checker-inline-sharpen-calls/src/Main.java
@@ -19,23 +19,25 @@
   public void invokeVirtual() {
   }
 
-  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (before)
-  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) builder (after)
+  /// CHECK-DAG:     <<Invoke:v\d+>>  InvokeVirtual
   /// CHECK-DAG:                      ReturnVoid
 
   /// CHECK-START: void Main.inlineSharpenInvokeVirtual(Main) inliner (after)
+  /// CHECK-NOT:                      InvokeVirtual
   /// CHECK-NOT:                      InvokeStaticOrDirect
 
   public static void inlineSharpenInvokeVirtual(Main m) {
     m.invokeVirtual();
   }
 
-  /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (before)
-  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeStaticOrDirect
+  /// CHECK-START: int Main.inlineSharpenStringInvoke() ssa_builder (after)
+  /// CHECK-DAG:     <<Invoke:i\d+>>  InvokeVirtual
   /// CHECK-DAG:                      Return [<<Invoke>>]
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
+  /// CHECK-NOT:                      InvokeVirtual
 
   /// CHECK-START: int Main.inlineSharpenStringInvoke() inliner (after)
   /// CHECK-DAG:     <<Field:i\d+>>   InstanceFieldGet
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index c1f25b3..87ff3f7 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -25,10 +25,10 @@
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect recursive:true
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect recursive:true
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt
index b0014d7..42b331f 100644
--- a/test/492-checker-inline-invoke-interface/expected.txt
+++ b/test/492-checker-inline-invoke-interface/expected.txt
@@ -2,4 +2,4 @@
 java.lang.Exception
 	at ForceStatic.<clinit>(Main.java:24)
 	at Main.$inline$foo(Main.java:31)
-	at Main.main(Main.java:48)
+	at Main.main(Main.java:50)
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index 9a45485..a8b6307 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -31,15 +31,17 @@
     int a = ForceStatic.field;
   }
 
-  /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
+  /// CHECK-START: void Main.main(java.lang.String[]) ssa_builder (after)
   /// CHECK:           InvokeStaticOrDirect
-  /// CHECK:           InvokeStaticOrDirect
+  /// CHECK:           InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (before)
   /// CHECK-NOT:       ClinitCheck
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK-NOT:       InvokeStaticOrDirect
+  /// CHECK-NOT:       InvokeVirtual
+  /// CHECK-NOT:       InvokeInterface
 
   /// CHECK-START: void Main.main(java.lang.String[]) inliner (after)
   /// CHECK:           ClinitCheck
diff --git a/test/530-checker-loops/src/Main.java b/test/530-checker-loops/src/Main.java
index 58c92f1..3f6e48b 100644
--- a/test/530-checker-loops/src/Main.java
+++ b/test/530-checker-loops/src/Main.java
@@ -29,6 +29,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linear(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -41,6 +42,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDown(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDown(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -53,6 +55,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearObscure(int[] x) {
     int result = 0;
     for (int i = x.length - 1; i >= 0; i--) {
@@ -66,6 +69,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearVeryObscure(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearVeryObscure(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; i++) {
@@ -79,6 +83,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWhile(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWhile(int[] x) {
     int i = 0;
     int result = 0;
@@ -92,6 +97,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearThreeWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -108,6 +114,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearFourWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearFourWayPhi(int[] x) {
     int result = 0;
     for (int i = 0; i < x.length; ) {
@@ -128,6 +135,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinear(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinear(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -143,6 +151,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.wrapAroundThenLinearThreeWayPhi(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int wrapAroundThenLinearThreeWayPhi(int[] x) {
     // Loop with wrap around (length - 1, 0, 1, 2, ..).
     int w = x.length - 1;
@@ -162,6 +171,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearWithParameter(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearWithParameter(int n) {
     int[] x = new int[n];
     for (int i = 0; i < n; i++) {
@@ -174,6 +184,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int[] Main.linearCopy(int[]) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int[] linearCopy(int x[]) {
     int n = x.length;
     int y[] = new int[n];
@@ -183,10 +194,55 @@
     return y;
   }
 
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwo(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwo(int x[]) {
+    int n = x.length / 2;
+    int result = 0;
+    for (int i = 0; i < n; i++) {
+      int ii = i << 1;
+      result += x[ii];
+      result += x[ii + 1];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip1(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip1(int x[]) {
+    int result = 0;
+    for (int i = 0; i < x.length / 2; i++) {
+      result += x[2 * i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.linearByTwoSkip2(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int linearByTwoSkip2(int x[]) {
+    int result = 0;
+    // This case is not optimized.
+    for (int i = 0; i < x.length; i+=2) {
+      result += x[i];
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithCompoundStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithCompoundStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 };
     int result = 0;
@@ -202,6 +258,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargePositiveStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -218,6 +275,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargePositiveStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargePositiveStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -234,6 +292,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithLargeNegativeStride() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -250,6 +309,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearWithVeryLargeNegativeStride() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearWithVeryLargeNegativeStride() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 };
     int result = 0;
@@ -266,6 +326,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -279,6 +340,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearForNEDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearForNEDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -292,6 +354,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileUp() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -306,6 +369,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearDoWhileDown() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearDoWhileDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -320,6 +384,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.linearShort() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int linearShort() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -330,10 +395,160 @@
     return result;
   }
 
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.invariantFromPreLoop(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int invariantFromPreLoop(int[] x, int y) {
+    int result = 0;
+    // Strange pre-loop that sets upper bound.
+    int hi;
+    while (true) {
+      y = y % 3;
+      hi = x.length;
+      if (y != 123) break;
+    }
+    for (int i = 0; i < hi; i++) {
+       result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnTwoArrayLengths(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnTwoArrayLengths(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < b.length; j++) {
+        // Need to know j < b.length < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < b.length for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnOneArrayLength(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnOneArrayLength(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < a.length; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < a.length for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.linearTriangularOnParameter(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void linearTriangularOnParameter(int n) {
+    int[] a = new int[n];
+    for (int i = 0; i < n; i++) {
+      int[] b = new int[i];
+      for (int j = 0; j < i; j++) {
+        // Need to know j < i < n for static bce.
+        a[j] += 1;
+        // Need to know just j < i for static bce.
+        b[j] += 1;
+      }
+      verifyTriangular(a, b, i, n);
+    }
+  }
+
+  // Verifier for triangular methods.
+  private static void verifyTriangular(int[] a, int[] b, int m, int n) {
+    expectEquals(n, a.length);
+    for (int i = 0, k = m; i < n; i++) {
+      expectEquals(a[i], k);
+      if (k > 0) k--;
+    }
+    expectEquals(m, b.length);
+    for (int i = 0; i < m; i++) {
+      expectEquals(b[i], 1);
+    }
+  }
+
+  /// CHECK-START: void Main.bubble(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-START: void Main.bubble(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: ArraySet
+  /// CHECK-DAG: ArraySet
+  /// CHECK-NOT: Deoptimize
+  private static void bubble(int[] a) {
+    for (int i = a.length; --i >= 0;) {
+      for (int j = 0; j < i; j++) {
+        if (a[j] > a[j+1]) {
+          int tmp = a[j];
+          a[j]  = a[j+1];
+          a[j+1] = tmp;
+        }
+      }
+    }
+  }
+
   /// CHECK-START: int Main.periodicIdiom(int) BCE (before)
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicIdiom(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicIdiom(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -350,6 +565,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence2(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence2(int tc) {
     int[] x = { 1, 3 };
     // Loop with periodic sequence (0, 1).
@@ -372,6 +588,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.periodicSequence4(int) BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int periodicSequence4(int tc) {
     int[] x = { 1, 3, 5, 7 };
     // Loop with periodic sequence (0, 1, 2, 3).
@@ -395,6 +612,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -408,6 +626,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -421,6 +640,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightUp3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightUp3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -434,6 +654,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBUp() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBUp() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -448,6 +669,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown1() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown1() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -461,6 +683,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown2() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown2() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -474,6 +697,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justRightDown3() BCE (after)
   /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justRightDown3() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -487,6 +711,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: int Main.justOOBDown() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static int justOOBDown() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int result = 0;
@@ -501,6 +726,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.lowerOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void lowerOOB(int[] x) {
     for (int i = -1; i < x.length; i++) {
       sResult += x[i];
@@ -511,6 +737,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.upperOOB(int[]) BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void upperOOB(int[] x) {
     for (int i = 0; i <= x.length; i++) {
       sResult += x[i];
@@ -521,6 +748,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileUpOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileUpOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = 0;
@@ -533,6 +761,7 @@
   /// CHECK-DAG: BoundsCheck
   /// CHECK-START: void Main.doWhileDownOOB() BCE (after)
   /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: Deoptimize
   private static void doWhileDownOOB() {
     int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
     int i = x.length - 1;
@@ -541,6 +770,306 @@
     } while (-1 <= i);
   }
 
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE1(int[], int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE1(int[] x, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (before)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-START: int Main.linearDynamicBCE2(int[], int, int, int) BCE (after)
+  /// CHECK-DAG: StaticFieldGet
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: StaticFieldSet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  private static int linearDynamicBCE2(int[] x, int lo, int hi, int offset) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      sResult += x[offset + i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.wrapAroundDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int wrapAroundDynamicBCE(int[] x) {
+    int w = 9;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[w];
+      w = i;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.periodicDynamicBCE(int[]) BCE (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  private static int periodicDynamicBCE(int[] x) {
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < 10; i++) {
+      result += x[k];
+      k = 1 - k;
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-NOT: NullCheck
+  /// CHECK-NOT: ArrayLength
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  static int dynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // This loop could be infinite for hi = max int. Since i is also used
+    // as subscript, however, dynamic bce can proceed.
+    int result = 0;
+    for (int i = lo; i <= hi; i++) {
+      result += x[i];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEPossiblyInfiniteLoop(int[], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEPossiblyInfiniteLoop(int[] x, int lo, int hi) {
+    // As above, but now the index is not used as subscript,
+    // and dynamic bce is not applied.
+    int result = 0;
+    for (int k = 0, i = lo; i <= hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.noDynamicBCEMixedInductionTypes(int[], long, long) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: Deoptimize
+  static int noDynamicBCEMixedInductionTypes(int[] x, long lo, long hi) {
+    int result = 0;
+    // Mix of int and long induction.
+    int k = 0;
+    for (long i = lo; i < hi; i++) {
+      result += x[k++];
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (before)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndices(int[], int[][], int, int) BCE (after)
+  /// CHECK-DAG: NullCheck
+  /// CHECK-DAG: ArrayLength
+  /// CHECK-DAG: NotEqual
+  /// CHECK-DAG: If
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: If
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: ArrayGet
+  static int dynamicBCEAndConstantIndices(int[] x, int[][] a, int lo, int hi) {
+    // Deliberately test array length on a before the loop so that only bounds checks
+    // on constant subscripts remain, making them a viable candidate for hoisting.
+    if (a.length == 0) {
+      return -1;
+    }
+    // Loop that allows BCE on x[i].
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += x[i];
+      if ((i % 10) != 0) {
+        // None of the subscripts inside a conditional are removed by dynamic bce,
+        // making them a candidate for deoptimization based on constant indices.
+        // Compiler should ensure the array loads are not subsequently hoisted
+        // "above" the deoptimization "barrier" on the bounds.
+        a[0][i] = 1;
+        a[1][i] = 2;
+        a[99][i] = 3;
+      }
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (before)
+  /// CHECK-DAG: If
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-START: int Main.dynamicBCEAndConstantIndicesAllTypes(int[], boolean[], byte[], char[], short[], int[], long[], float[], double[], java.lang.Integer[], int, int) BCE (after)
+  /// CHECK-DAG: If
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: ArrayGet
+  /// CHECK-DAG: Exit
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: ArrayGet
+  static int dynamicBCEAndConstantIndicesAllTypes(int[] q,
+                                                  boolean[] r,
+                                                  byte[] s,
+                                                  char[] t,
+                                                  short[] u,
+                                                  int[] v,
+                                                  long[] w,
+                                                  float[] x,
+                                                  double[] y,
+                                                  Integer[] z, int lo, int hi) {
+    int result = 0;
+    for (int i = lo; i < hi; i++) {
+      result += q[i] + (r[0] ? 1 : 0) + (int) s[0] + (int) t[0] + (int) u[0] + (int) v[0] +
+                                        (int) w[0] + (int) x[0] + (int) y[0] + (int) z[0];
+    }
+    return result;
+  }
+
   //
   // Verifier.
   //
@@ -596,6 +1125,9 @@
     }
 
     // Linear with non-unit strides.
+    expectEquals(55, linearByTwo(x));
+    expectEquals(25, linearByTwoSkip1(x));
+    expectEquals(25, linearByTwoSkip2(x));
     expectEquals(56, linearWithCompoundStride());
     expectEquals(66, linearWithLargePositiveStride());
     expectEquals(66, linearWithVeryLargePositiveStride());
@@ -608,6 +1140,17 @@
     expectEquals(55, linearDoWhileUp());
     expectEquals(55, linearDoWhileDown());
     expectEquals(55, linearShort());
+    expectEquals(55, invariantFromPreLoop(x, 1));
+    linearTriangularOnTwoArrayLengths(10);
+    linearTriangularOnOneArrayLength(10);
+    linearTriangularOnParameter(10);
+
+    // Sorting.
+    int[] sort = { 5, 4, 1, 9, 10, 2, 7, 6, 3, 8 };
+    bubble(sort);
+    for (int i = 0; i < 10; i++) {
+      expectEquals(sort[i], x[i]);
+    }
 
     // Periodic adds (1, 3), one at the time.
     expectEquals(0, periodicIdiom(-1));
@@ -690,6 +1233,86 @@
       sResult += 1000;
     }
     expectEquals(1055, sResult);
+
+    // Dynamic BCE.
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, -1, x.length);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE1(x, 0, x.length);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE1(x, 0, x.length + 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1055, sResult);
+
+    // Dynamic BCE with offset.
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1000, sResult);
+    sResult = 0;
+    linearDynamicBCE2(x, 0, x.length, 0);
+    expectEquals(55, sResult);
+    sResult = 0;
+    try {
+      linearDynamicBCE2(x, 0, x.length, 1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult += 1000;
+    }
+    expectEquals(1054, sResult);
+
+    // Dynamic BCE candidates.
+    expectEquals(55, wrapAroundDynamicBCE(x));
+    expectEquals(15, periodicDynamicBCE(x));
+    expectEquals(55, dynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEPossiblyInfiniteLoop(x, 0, 9));
+    expectEquals(55, noDynamicBCEMixedInductionTypes(x, 0, 10));
+
+    // Dynamic BCE combined with constant indices.
+    int[][] a;
+    a = new int[0][0];
+    expectEquals(-1, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    a = new int[100][10];
+    expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    for (int i = 0; i < 10; i++) {
+      expectEquals((i % 10) != 0 ? 1 : 0, a[0][i]);
+      expectEquals((i % 10) != 0 ? 2 : 0, a[1][i]);
+      expectEquals((i % 10) != 0 ? 3 : 0, a[99][i]);
+    }
+    a = new int[2][10];
+    sResult = 0;
+    try {
+      expectEquals(55, dynamicBCEAndConstantIndices(x, a, 0, 10));
+    } catch (ArrayIndexOutOfBoundsException e) {
+      sResult = 1;
+    }
+    expectEquals(1, sResult);
+    expectEquals(a[0][1], 1);
+    expectEquals(a[1][1], 2);
+
+    // Dynamic BCE combined with constant indices of all types.
+    boolean[] x1 = { true };
+    byte[] x2 = { 2 };
+    char[] x3 = { 3 };
+    short[] x4 = { 4 };
+    int[] x5 = { 5 };
+    long[] x6 = { 6 };
+    float[] x7 = { 7 };
+    double[] x8 = { 8 };
+    Integer[] x9 = { 9 };
+    expectEquals(505,
+        dynamicBCEAndConstantIndicesAllTypes(x, x1, x2, x3, x4, x5, x6, x7, x8, x9, 0, 10));
   }
 
   private static void expectEquals(int expected, int result) {
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 1b784ae..3f65d5a 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -35,7 +35,7 @@
   }
 
   /// CHECK-START: boolean Main.stringEqualsNull() register (after)
-  /// CHECK:      <<Invoke:z\d+>> InvokeStaticOrDirect
+  /// CHECK:      <<Invoke:z\d+>> InvokeVirtual
   /// CHECK:      Return [<<Invoke>>]
   public static boolean stringEqualsNull() {
     String o = (String)myObject;
@@ -47,7 +47,7 @@
   }
 
   /// CHECK-START-X86: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeVirtual
   /// CHECK-NOT:      test
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
@@ -56,7 +56,7 @@
 
   // Test is very brittle as it depends on the order we emit instructions.
   /// CHECK-START-X86: boolean Main.stringArgumentIsString() disassembly (after)
-  /// CHECK:      InvokeStaticOrDirect
+  /// CHECK:      InvokeVirtual
   /// CHECK:      test
   /// CHECK:      jz/eq
   // Check that we don't try to compare the classes.
diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build
new file mode 100644
index 0000000..18e8c59
--- /dev/null
+++ b/test/551-checker-shifter-operand/build
@@ -0,0 +1,212 @@
+#!/bin/bash
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This is an almost exact copy of `art/test/etc/default-build`. Only the parsing
+# of `dx` option has been overriden.
+
+# Stop if something fails.
+set -e
+
+# Set default values for directories.
+if [ -d smali ]; then
+  HAS_SMALI=true
+else
+  HAS_SMALI=false
+fi
+
+if [ -d src ]; then
+  HAS_SRC=true
+else
+  HAS_SRC=false
+fi
+
+if [ -d src2 ]; then
+  HAS_SRC2=true
+else
+  HAS_SRC2=false
+fi
+
+if [ -d src-multidex ]; then
+  HAS_SRC_MULTIDEX=true
+else
+  HAS_SRC_MULTIDEX=false
+fi
+
+if [ -d src-ex ]; then
+  HAS_SRC_EX=true
+else
+  HAS_SRC_EX=false
+fi
+
+DX_FLAGS=""
+SKIP_DX_MERGER="false"
+EXPERIMENTAL=""
+
+# Setup experimental flag mappings in a bash associative array.
+declare -A JACK_EXPERIMENTAL_ARGS
+JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8"
+JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8"
+
+while true; do
+  if [ "x$1" = "x--dx-option" ]; then
+    shift
+    option="$1"
+    # Make sure we run this test *with* `dx` optimizations.
+    if [ "x$option" != "x--no-optimize" ]; then
+      DX_FLAGS="${DX_FLAGS} $option"
+    fi
+    shift
+  elif [ "x$1" = "x--jvm" ]; then
+    shift
+  elif [ "x$1" = "x--no-src" ]; then
+    HAS_SRC=false
+    shift
+  elif [ "x$1" = "x--no-src2" ]; then
+    HAS_SRC2=false
+    shift
+  elif [ "x$1" = "x--no-src-multidex" ]; then
+    HAS_SRC_MULTIDEX=false
+    shift
+  elif [ "x$1" = "x--no-src-ex" ]; then
+    HAS_SRC_EX=false
+    shift
+  elif [ "x$1" = "x--no-smali" ]; then
+    HAS_SMALI=false
+    shift
+  elif [ "x$1" = "x--experimental" ]; then
+    shift
+    EXPERIMENTAL="${EXPERIMENTAL} $1"
+    shift
+  elif expr "x$1" : "x--" >/dev/null 2>&1; then
+    echo "unknown $0 option: $1" 1>&2
+    exit 1
+  else
+    break
+  fi
+done
+
+# Add args from the experimental mappings.
+for experiment in ${EXPERIMENTAL}; do
+  JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}"
+done
+
+if [ -e classes.dex ]; then
+  zip $TEST_NAME.jar classes.dex
+  exit 0
+fi
+
+if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then
+  # No src directory? Then forget about trying to run dx.
+  SKIP_DX_MERGER="true"
+fi
+
+if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+  # Jack does not support this configuration unless we specify how to partition the DEX file
+  # with a .jpp file.
+  USE_JACK="false"
+fi
+
+if [ ${USE_JACK} = "true" ]; then
+  # Jack toolchain
+  if [ "${HAS_SRC}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src.jack src
+    imported_jack_files="--import src.jack"
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} --output-jack src2.jack src2
+    imported_jack_files="--import src2.jack ${imported_jack_files}"
+  fi
+
+  # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider
+  # class definitions from src2 first.
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex .
+  fi
+else
+  # Legacy toolchain with javac+dx
+  if [ "${HAS_SRC}" = "true" ]; then
+    mkdir classes
+    ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then
+    mkdir classes2
+    ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \
+        --dump-width=1000 ${DX_FLAGS} classes2
+    fi
+  fi
+
+  if [ "${HAS_SRC2}" = "true" ]; then
+    mkdir -p classes
+    ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'`
+  fi
+
+  if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then
+    if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \
+        --dump-width=1000 ${DX_FLAGS} classes
+    fi
+  fi
+fi
+
+if [ "${HAS_SMALI}" = "true" ]; then
+  # Compile Smali classes
+  ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'`
+
+  # Don't bother with dexmerger if we provide our own main function in a smali file.
+  if [ ${SKIP_DX_MERGER} = "false" ]; then
+    ${DXMERGER} classes.dex classes.dex smali_classes.dex
+  else
+    mv smali_classes.dex classes.dex
+  fi
+fi
+
+if [ ${HAS_SRC_EX} = "true" ]; then
+  if [ ${USE_JACK} = "true" ]; then
+      # Rename previous "classes.dex" so it is not overwritten.
+      mv classes.dex classes-1.dex
+      #TODO find another way to append src.jack to the jack classpath
+      ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex
+      zip $TEST_NAME-ex.jar classes.dex
+      # Restore previous "classes.dex" so it can be zipped.
+      mv classes-1.dex classes.dex
+  else
+    mkdir classes-ex
+    ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'`
+    if [ ${NEED_DEX} = "true" ]; then
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \
+        --dump-width=1000 ${DX_FLAGS} classes-ex
+
+      # quick shuffle so that the stored name is "classes.dex"
+      mv classes.dex classes-1.dex
+      mv classes-ex.dex classes.dex
+      zip $TEST_NAME-ex.jar classes.dex
+      mv classes.dex classes-ex.dex
+      mv classes-1.dex classes.dex
+    fi
+  fi
+fi
+
+# Create a single jar with two dex files for multidex.
+if [ ${HAS_SRC_MULTIDEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex classes2.dex
+elif [ ${NEED_DEX} = "true" ]; then
+  zip $TEST_NAME.jar classes.dex
+fi
diff --git a/test/551-checker-shifter-operand/expected.txt b/test/551-checker-shifter-operand/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-checker-shifter-operand/expected.txt
diff --git a/test/551-checker-shifter-operand/info.txt b/test/551-checker-shifter-operand/info.txt
new file mode 100644
index 0000000..10e998c
--- /dev/null
+++ b/test/551-checker-shifter-operand/info.txt
@@ -0,0 +1 @@
+Test the merging of instructions into the shifter operand on arm64.
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
new file mode 100644
index 0000000..decdd1f
--- /dev/null
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -0,0 +1,678 @@
+/*
+* Copyright (C) 2015 The Android Open Source Project
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*      http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+public class Main {
+
+  // A dummy value to defeat inlining of these routines.
+  static boolean doThrow = false;
+
+  public static void assertByteEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertShortEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  // Non-inlinable type-casting helpers.
+  static  char $noinline$byteToChar   (byte v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$byteToShort  (byte v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$byteToInt    (byte v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$byteToLong   (byte v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$charToByte   (char v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static short $noinline$charToShort  (char v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$charToInt    (char v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$charToLong   (char v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$shortToByte (short v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$shortToChar (short v) { if (doThrow) throw new Error(); return  (char)v; }
+  static   int $noinline$shortToInt  (short v) { if (doThrow) throw new Error(); return   (int)v; }
+  static  long $noinline$shortToLong (short v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$intToByte     (int v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$intToChar     (int v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$intToShort    (int v) { if (doThrow) throw new Error(); return (short)v; }
+  static  long $noinline$intToLong     (int v) { if (doThrow) throw new Error(); return  (long)v; }
+  static  byte $noinline$longToByte   (long v) { if (doThrow) throw new Error(); return  (byte)v; }
+  static  char $noinline$longToChar   (long v) { if (doThrow) throw new Error(); return  (char)v; }
+  static short $noinline$longToShort  (long v) { if (doThrow) throw new Error(); return (short)v; }
+  static   int $noinline$longToInt    (long v) { if (doThrow) throw new Error(); return   (int)v; }
+
+  /**
+   * Basic test merging a bitfield move operation (here a type conversion) into
+   * the shifter operand.
+   */
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:       <<tmp:j\d+>>         TypeConversion [<<b>>]
+  /// CHECK:                            Sub [<<l>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<l:j\d+>>           ParameterValue
+  /// CHECK-DAG:   <<b:b\d+>>           ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+  /// CHECK-NOT:                        Sub
+
+  /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) disassembly (after)
+  /// CHECK:                            sub x{{\d+}}, x{{\d+}}, w{{\d+}}, sxtb
+
+  public static long $opt$noinline$translate(long l, byte b) {
+    if (doThrow) throw new Error();
+    long tmp = (long)b;
+    return l - tmp;
+  }
+
+
+  /**
+   * Test that we do not merge into the shifter operand when the left and right
+   * inputs are the the IR.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<a:i\d+>>           ParameterValue
+  /// CHECK:       <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<tmp>>,<<tmp>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:   <<a:i\d+>>           ParameterValue
+  /// CHECK-DAG:   <<Const2:i\d+>>      IntConstant 2
+  /// CHECK:       <<Shl:i\d+>>         Shl [<<a>>,<<Const2>>]
+  /// CHECK:                            Add [<<Shl>>,<<Shl>>]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static int $opt$noinline$sameInput(int a) {
+    if (doThrow) throw new Error();
+    int tmp = a << 2;
+    return tmp + tmp;
+  }
+
+  /**
+   * Check that we perform the merge for multiple uses.
+   */
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:       <<Const23:i\d+>>     IntConstant 23
+  /// CHECK:       <<tmp:i\d+>>         Shl [<<arg>>,<<Const23>>]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+  /// CHECK:                            Add [<<tmp>>,{{i\d+}}]
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK:       <<arg:i\d+>>         ParameterValue
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+  /// CHECK:                            Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23
+
+  /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Add
+
+  public static int $opt$noinline$multipleUses(int arg) {
+    if (doThrow) throw new Error();
+    int tmp = arg << 23;
+    switch (arg) {
+      case 1:  return (arg | 1) + tmp;
+      case 2:  return (arg | 2) + tmp;
+      case 3:  return (arg | 3) + tmp;
+      case 4:  return (arg | 4) + tmp;
+      case (1 << 20):  return (arg | 5) + tmp;
+      default: return 0;
+    }
+  }
+
+  /**
+   * Logical instructions cannot take 'extend' operations into the shift
+   * operand, so test that only the shifts are merged.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
+  /// CHECK:                            and lsl
+  /// CHECK:                            sxtb
+  /// CHECK:                            and
+
+  static void $opt$noinline$testAnd(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a & $noinline$LongShl(b, 5)) | (a & $noinline$longToByte(b)),
+                     (a & (b << 5)) | (a & (byte)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after)
+  /// CHECK:                            orr asr
+  /// CHECK:                            uxth
+  /// CHECK:                            orr
+
+  static void $opt$noinline$testOr(int a, int b) {
+    if (doThrow) throw new Error();
+    assertIntEquals((a | $noinline$IntShr(b, 6)) | (a | $noinline$intToChar(b)),
+                    (a | (b >> 6)) | (a | (char)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after)
+  /// CHECK:                            eor lsr
+  /// CHECK:                            sxtw
+  /// CHECK:                            eor
+
+  static void $opt$noinline$testXor(long a, long b) {
+    if (doThrow) throw new Error();
+    assertLongEquals((a ^ $noinline$LongUshr(b, 7)) | (a ^ $noinline$longToInt(b)),
+                     (a ^ (b >>> 7)) | (a ^ (int)b));
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after)
+  /// CHECK:                            neg lsl
+  /// CHECK:                            sxth
+  /// CHECK:                            neg
+
+  static void $opt$noinline$testNeg(int a) {
+    if (doThrow) throw new Error();
+    assertIntEquals(-$noinline$IntShl(a, 8) | -$noinline$intToShort(a),
+                    (-(a << 8)) | (-(short)a));
+  }
+
+  /**
+   * The functions below are used to compare the result of optimized operations
+   * to non-optimized operations.
+   * On the left-hand side we use a non-inlined function call to ensure the
+   * optimization does not occur. The checker tests ensure that the optimization
+   * does occur on the right-hand.
+   */
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteInt1(int a, byte b) {
+    assertIntEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    assertIntEquals(a + $noinline$byteToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendByteInt2(int a, byte b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$byteToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$byteToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendByteLong(long a, byte b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$byteToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$byteToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$byteToInt  (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendByte(long a, byte b) {
+    $opt$validateExtendByteInt1((int)a, b);
+    $opt$validateExtendByteInt2((int)a, b);
+    $opt$validateExtendByteLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharInt1(int a, char b) {
+    assertIntEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertIntEquals(a + $noinline$charToShort(b), a + (short)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendCharInt2(int a, char b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$charToInt (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$charToLong(b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendCharLong(long a, char b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$charToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$charToShort(b), a + (short)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$charToInt  (b), a +   (int)b);
+  }
+
+  public static void $opt$validateExtendChar(long a, char b) {
+    $opt$validateExtendCharInt1((int)a, b);
+    $opt$validateExtendCharInt2((int)a, b);
+    $opt$validateExtendCharLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortInt1(int a, short b) {
+    assertIntEquals(a + $noinline$shortToByte (b), a + (byte)b);
+    assertIntEquals(a + $noinline$shortToChar (b), a + (char)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+  /// CHECK-NOT:                        Arm64DataProcWithShifterOp
+
+  public static void $opt$validateExtendShortInt2(int a, short b) {
+    // The conversion to `int` has been optimized away, so there is nothing to merge.
+    assertIntEquals (a + $noinline$shortToInt  (b), a +  (int)b);
+    // There is an environment use for `(long)b`, preventing the merge.
+    assertLongEquals(a + $noinline$shortToLong (b), a + (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendShortLong(long a, short b) {
+    // The first two tests have a type conversion.
+    assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b);
+    assertLongEquals(a + $noinline$shortToChar(b), a + (char)b);
+    // This test does not because the conversion to `int` is optimized away.
+    assertLongEquals(a + $noinline$shortToInt (b), a +  (int)b);
+  }
+
+  public static void $opt$validateExtendShort(long a, short b) {
+    $opt$validateExtendShortInt1((int)a, b);
+    $opt$validateExtendShortInt2((int)a, b);
+    $opt$validateExtendShortLong(a, b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendInt(long a, int b) {
+    // All tests have a conversion to `long`. The first three tests also have a
+    // conversion from `int` to the specified type. For each test the conversion
+    // to `long` is merged into the shifter operand.
+    assertLongEquals(a + $noinline$intToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$intToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$intToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$intToLong (b), a +  (long)b);
+  }
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK:                            TypeConversion
+  /// CHECK-NOT:                        TypeConversion
+
+  public static void $opt$validateExtendLong(long a, long b) {
+    // Each test has two conversions, from `long` and then back to `long`. The
+    // conversions to `long` are merged.
+    assertLongEquals(a + $noinline$longToByte (b), a +  (byte)b);
+    assertLongEquals(a + $noinline$longToChar (b), a +  (char)b);
+    assertLongEquals(a + $noinline$longToShort(b), a + (short)b);
+    assertLongEquals(a + $noinline$longToInt  (b), a +   (int)b);
+  }
+
+
+  static int $noinline$IntShl(int b, int c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static int $noinline$IntShr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static int $noinline$IntUshr(int b, int c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftInt(int a, int b) {
+    assertIntEquals(a + $noinline$IntShl(b, 1),   a + (b <<  1));
+    assertIntEquals(a + $noinline$IntShl(b, 6),   a + (b <<  6));
+    assertIntEquals(a + $noinline$IntShl(b, 7),   a + (b <<  7));
+    assertIntEquals(a + $noinline$IntShl(b, 8),   a + (b <<  8));
+    assertIntEquals(a + $noinline$IntShl(b, 14),  a + (b << 14));
+    assertIntEquals(a + $noinline$IntShl(b, 15),  a + (b << 15));
+    assertIntEquals(a + $noinline$IntShl(b, 16),  a + (b << 16));
+    assertIntEquals(a + $noinline$IntShl(b, 30),  a + (b << 30));
+    assertIntEquals(a + $noinline$IntShl(b, 31),  a + (b << 31));
+    assertIntEquals(a + $noinline$IntShl(b, 32),  a + (b << 32));
+    assertIntEquals(a + $noinline$IntShl(b, 62),  a + (b << 62));
+    assertIntEquals(a + $noinline$IntShl(b, 63),  a + (b << 63));
+
+    assertIntEquals(a - $noinline$IntShr(b, 1),   a - (b >>  1));
+    assertIntEquals(a - $noinline$IntShr(b, 6),   a - (b >>  6));
+    assertIntEquals(a - $noinline$IntShr(b, 7),   a - (b >>  7));
+    assertIntEquals(a - $noinline$IntShr(b, 8),   a - (b >>  8));
+    assertIntEquals(a - $noinline$IntShr(b, 14),  a - (b >> 14));
+    assertIntEquals(a - $noinline$IntShr(b, 15),  a - (b >> 15));
+    assertIntEquals(a - $noinline$IntShr(b, 16),  a - (b >> 16));
+    assertIntEquals(a - $noinline$IntShr(b, 30),  a - (b >> 30));
+    assertIntEquals(a - $noinline$IntShr(b, 31),  a - (b >> 31));
+    assertIntEquals(a - $noinline$IntShr(b, 32),  a - (b >> 32));
+    assertIntEquals(a - $noinline$IntShr(b, 62),  a - (b >> 62));
+    assertIntEquals(a - $noinline$IntShr(b, 63),  a - (b >> 63));
+
+    assertIntEquals(a ^ $noinline$IntUshr(b, 1),   a ^ (b >>>  1));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 6),   a ^ (b >>>  6));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 7),   a ^ (b >>>  7));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 8),   a ^ (b >>>  8));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 14),  a ^ (b >>> 14));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 15),  a ^ (b >>> 15));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 16),  a ^ (b >>> 16));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 30),  a ^ (b >>> 30));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 31),  a ^ (b >>> 31));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 32),  a ^ (b >>> 32));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 62),  a ^ (b >>> 62));
+    assertIntEquals(a ^ $noinline$IntUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  static long $noinline$LongShl(long b, long c) {
+    if (doThrow) throw new Error();
+    return b << c;
+  }
+  static long $noinline$LongShr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >> c;
+  }
+  static long $noinline$LongUshr(long b, long c) {
+    if (doThrow) throw new Error();
+    return b >>> c;
+  }
+
+  // Each test line below should see one merge.
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+  /// CHECK:                            Arm64DataProcWithShifterOp
+
+  /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after)
+  /// CHECK-NOT:                        Shl
+  /// CHECK-NOT:                        Shr
+  /// CHECK-NOT:                        UShr
+
+  public static void $opt$validateShiftLong(long a, long b) {
+    assertLongEquals(a + $noinline$LongShl(b, 1),   a + (b <<  1));
+    assertLongEquals(a + $noinline$LongShl(b, 6),   a + (b <<  6));
+    assertLongEquals(a + $noinline$LongShl(b, 7),   a + (b <<  7));
+    assertLongEquals(a + $noinline$LongShl(b, 8),   a + (b <<  8));
+    assertLongEquals(a + $noinline$LongShl(b, 14),  a + (b << 14));
+    assertLongEquals(a + $noinline$LongShl(b, 15),  a + (b << 15));
+    assertLongEquals(a + $noinline$LongShl(b, 16),  a + (b << 16));
+    assertLongEquals(a + $noinline$LongShl(b, 30),  a + (b << 30));
+    assertLongEquals(a + $noinline$LongShl(b, 31),  a + (b << 31));
+    assertLongEquals(a + $noinline$LongShl(b, 32),  a + (b << 32));
+    assertLongEquals(a + $noinline$LongShl(b, 62),  a + (b << 62));
+    assertLongEquals(a + $noinline$LongShl(b, 63),  a + (b << 63));
+
+    assertLongEquals(a - $noinline$LongShr(b, 1),   a - (b >>  1));
+    assertLongEquals(a - $noinline$LongShr(b, 6),   a - (b >>  6));
+    assertLongEquals(a - $noinline$LongShr(b, 7),   a - (b >>  7));
+    assertLongEquals(a - $noinline$LongShr(b, 8),   a - (b >>  8));
+    assertLongEquals(a - $noinline$LongShr(b, 14),  a - (b >> 14));
+    assertLongEquals(a - $noinline$LongShr(b, 15),  a - (b >> 15));
+    assertLongEquals(a - $noinline$LongShr(b, 16),  a - (b >> 16));
+    assertLongEquals(a - $noinline$LongShr(b, 30),  a - (b >> 30));
+    assertLongEquals(a - $noinline$LongShr(b, 31),  a - (b >> 31));
+    assertLongEquals(a - $noinline$LongShr(b, 32),  a - (b >> 32));
+    assertLongEquals(a - $noinline$LongShr(b, 62),  a - (b >> 62));
+    assertLongEquals(a - $noinline$LongShr(b, 63),  a - (b >> 63));
+
+    assertLongEquals(a ^ $noinline$LongUshr(b, 1),   a ^ (b >>>  1));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 6),   a ^ (b >>>  6));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 7),   a ^ (b >>>  7));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 8),   a ^ (b >>>  8));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 14),  a ^ (b >>> 14));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 15),  a ^ (b >>> 15));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 16),  a ^ (b >>> 16));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 30),  a ^ (b >>> 30));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 31),  a ^ (b >>> 31));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 32),  a ^ (b >>> 32));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 62),  a ^ (b >>> 62));
+    assertLongEquals(a ^ $noinline$LongUshr(b, 63),  a ^ (b >>> 63));
+  }
+
+
+  public static void main(String[] args) {
+    assertLongEquals(10000L - 3L, $opt$noinline$translate(10000L, (byte)3));
+    assertLongEquals(-10000L - -3L, $opt$noinline$translate(-10000L, (byte)-3));
+
+    assertIntEquals(4096, $opt$noinline$sameInput(512));
+    assertIntEquals(-8192, $opt$noinline$sameInput(-1024));
+
+    assertIntEquals(((1 << 23) | 1), $opt$noinline$multipleUses(1));
+    assertIntEquals(((1 << 20) | 5), $opt$noinline$multipleUses(1 << 20));
+
+    long inputs[] = {
+      -((1L <<  7) - 1L), -((1L <<  7)), -((1L <<  7) + 1L),
+      -((1L << 15) - 1L), -((1L << 15)), -((1L << 15) + 1L),
+      -((1L << 16) - 1L), -((1L << 16)), -((1L << 16) + 1L),
+      -((1L << 31) - 1L), -((1L << 31)), -((1L << 31) + 1L),
+      -((1L << 32) - 1L), -((1L << 32)), -((1L << 32) + 1L),
+      -((1L << 63) - 1L), -((1L << 63)), -((1L << 63) + 1L),
+      -42L, -314L, -2718281828L, -0x123456789L, -0x987654321L,
+      -1L, -20L, -300L, -4000L, -50000L, -600000L, -7000000L, -80000000L,
+      0L,
+      1L, 20L, 300L, 4000L, 50000L, 600000L, 7000000L, 80000000L,
+      42L,  314L,  2718281828L,  0x123456789L,  0x987654321L,
+      (1L <<  7) - 1L, (1L <<  7), (1L <<  7) + 1L,
+      (1L <<  8) - 1L, (1L <<  8), (1L <<  8) + 1L,
+      (1L << 15) - 1L, (1L << 15), (1L << 15) + 1L,
+      (1L << 16) - 1L, (1L << 16), (1L << 16) + 1L,
+      (1L << 31) - 1L, (1L << 31), (1L << 31) + 1L,
+      (1L << 32) - 1L, (1L << 32), (1L << 32) + 1L,
+      (1L << 63) - 1L, (1L << 63), (1L << 63) + 1L,
+      Long.MIN_VALUE, Long.MAX_VALUE
+    };
+    for (int i = 0; i < inputs.length; i++) {
+      $opt$noinline$testNeg((int)inputs[i]);
+      for (int j = 0; j < inputs.length; j++) {
+        $opt$noinline$testAnd(inputs[i], inputs[j]);
+        $opt$noinline$testOr((int)inputs[i], (int)inputs[j]);
+        $opt$noinline$testXor(inputs[i], inputs[j]);
+
+        $opt$validateExtendByte(inputs[i], (byte)inputs[j]);
+        $opt$validateExtendChar(inputs[i], (char)inputs[j]);
+        $opt$validateExtendShort(inputs[i], (short)inputs[j]);
+        $opt$validateExtendInt(inputs[i], (int)inputs[j]);
+        $opt$validateExtendLong(inputs[i], inputs[j]);
+
+        $opt$validateShiftInt((int)inputs[i], (int)inputs[j]);
+        $opt$validateShiftLong(inputs[i], inputs[j]);
+      }
+    }
+
+  }
+}
diff --git a/test/551-invoke-super/expected.txt b/test/551-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/551-invoke-super/expected.txt
diff --git a/test/551-invoke-super/info.txt b/test/551-invoke-super/info.txt
new file mode 100644
index 0000000..864ddfe
--- /dev/null
+++ b/test/551-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when resolving to an abstract method.
diff --git a/test/551-invoke-super/smali/invokesuper.smali b/test/551-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/551-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/551-invoke-super/smali/superclass.smali b/test/551-invoke-super/smali/superclass.smali
new file mode 100644
index 0000000..47fbee7
--- /dev/null
+++ b/test/551-invoke-super/smali/superclass.smali
@@ -0,0 +1,26 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method abstract public returnInt()I
+.end method
diff --git a/test/551-invoke-super/src/Main.java b/test/551-invoke-super/src/Main.java
new file mode 100644
index 0000000..3a30184
--- /dev/null
+++ b/test/551-invoke-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected AbstractMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof AbstractMethodError)) {
+        throw new Error("Expected AbstractMethodError");
+      }
+    }
+  }
+}
diff --git a/test/552-checker-sharpening/expected.txt b/test/552-checker-sharpening/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-checker-sharpening/expected.txt
diff --git a/test/552-checker-sharpening/info.txt b/test/552-checker-sharpening/info.txt
new file mode 100644
index 0000000..c84539c
--- /dev/null
+++ b/test/552-checker-sharpening/info.txt
@@ -0,0 +1 @@
+Tests for sharpening.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
new file mode 100644
index 0000000..d50edd8
--- /dev/null
+++ b/test/552-checker-sharpening/src/Main.java
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static boolean doThrow = false;
+
+  private static int $noinline$foo(int x) {
+    if (doThrow) { throw new Error(); }
+    return x;
+  }
+
+  /// CHECK-START: int Main.testSimple(int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  public static int testSimple(int x) {
+    // This call should use PC-relative dex cache array load to retrieve the target method.
+    return $noinline$foo(x);
+  }
+
+  /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NEXT:           If
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           If
+
+  public static int testDiamond(boolean negate, int x) {
+    // These calls should use PC-relative dex cache array loads to retrieve the target method.
+    // PC-relative bases used by X86 and ARM should be pulled before the If.
+    if (negate) {
+      return $noinline$foo(-x);
+    } else {
+      return $noinline$foo(x);
+    }
+  }
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                X86ComputeBaseMethodAddress
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+
+  public static int testLoop(int[] array, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop.
+    for (int i : array) {
+      x += $noinline$foo(i);
+    }
+    return x;
+  }
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
+  /// CHECK-NOT:            X86ComputeBaseMethodAddress
+
+  /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           X86ComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
+  /// CHECK-NOT:            ArmDexCacheArraysBase
+
+  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           ArmDexCacheArraysBase
+  /// CHECK-NEXT:           Goto
+
+  public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
+    // PC-relative bases used by X86 and ARM should be pulled before the loop
+    // but not outside the if.
+    if (array != null) {
+      for (int i : array) {
+        if (negate) {
+          x += $noinline$foo(-i);
+        } else {
+          x += $noinline$foo(i);
+        }
+      }
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(1, testSimple(1));
+    assertIntEquals(1, testDiamond(false, 1));
+    assertIntEquals(-1, testDiamond(true, 1));
+    assertIntEquals(3, testLoop(new int[]{ 2 }, 1));
+    assertIntEquals(8, testLoop(new int[]{ 3, 4 }, 1));
+    assertIntEquals(1, testLoopWithDiamond(null, false, 1));
+    assertIntEquals(3, testLoopWithDiamond(new int[]{ 2 }, false, 1));
+    assertIntEquals(-6, testLoopWithDiamond(new int[]{ 3, 4 }, true, 1));
+  }
+}
diff --git a/test/552-invoke-non-existent-super/expected.txt b/test/552-invoke-non-existent-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/552-invoke-non-existent-super/expected.txt
diff --git a/test/552-invoke-non-existent-super/info.txt b/test/552-invoke-non-existent-super/info.txt
new file mode 100644
index 0000000..c5428d4
--- /dev/null
+++ b/test/552-invoke-non-existent-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode when the super class does not have the method.
diff --git a/test/552-invoke-non-existent-super/smali/invokesuper.smali b/test/552-invoke-non-existent-super/smali/invokesuper.smali
new file mode 100644
index 0000000..ad3c218
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on a non-super class to force complex resolution.
+    invoke-super {v1}, LInvokeSuper;->returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/552-invoke-non-existent-super/smali/superclass.smali b/test/552-invoke-non-existent-super/smali/superclass.smali
new file mode 100644
index 0000000..21d961e
--- /dev/null
+++ b/test/552-invoke-non-existent-super/smali/superclass.smali
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class abstract public LSuperClass;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
diff --git a/test/552-invoke-non-existent-super/src/Main.java b/test/552-invoke-non-existent-super/src/Main.java
new file mode 100644
index 0000000..c264471
--- /dev/null
+++ b/test/552-invoke-non-existent-super/src/Main.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    try {
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance(), new Object[0]);
+      throw new Error("Expected NoSuchMethodError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof NoSuchMethodError)) {
+        throw new Error("Expected NoSuchMethodError");
+      }
+    }
+  }
+}
diff --git a/test/553-invoke-super/expected.txt b/test/553-invoke-super/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/553-invoke-super/expected.txt
diff --git a/test/553-invoke-super/info.txt b/test/553-invoke-super/info.txt
new file mode 100644
index 0000000..ad99030
--- /dev/null
+++ b/test/553-invoke-super/info.txt
@@ -0,0 +1 @@
+Tests the invoke-super opcode.
diff --git a/test/553-invoke-super/smali/invokesuper.smali b/test/553-invoke-super/smali/invokesuper.smali
new file mode 100644
index 0000000..a6f9b4e
--- /dev/null
+++ b/test/553-invoke-super/smali/invokesuper.smali
@@ -0,0 +1,40 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+.class public LInvokeSuper;
+.super LSuperClass;
+
+.method public constructor <init>()V
+.registers 1
+    invoke-direct {v0}, LSuperClass;-><init>()V
+    return-void
+.end method
+
+
+.method public run()I
+.registers 2
+    # Do an invoke super on this class, to confuse runtime/compiler.
+    invoke-super {v1}, LInvokeSuper;->$noinline$returnInt()I
+    move-result v0
+    return v0
+.end method
+
+
+.method public $noinline$returnInt()I
+.registers 2
+    const v0, 777
+    return v0
+.end method
diff --git a/test/553-invoke-super/src/Main.java b/test/553-invoke-super/src/Main.java
new file mode 100644
index 0000000..91d2394
--- /dev/null
+++ b/test/553-invoke-super/src/Main.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  static void assertEquals(int expected, int value) {
+    if (expected != value) {
+      throw new Error("Expected " + expected + ", got " + value);
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("InvokeSuper");
+    Method m = c.getMethod("run");
+    assertEquals(42, ((Integer)m.invoke(c.newInstance(), new Object[0])).intValue());
+  }
+}
diff --git a/test/553-invoke-super/src/SuperClass.java b/test/553-invoke-super/src/SuperClass.java
new file mode 100644
index 0000000..36ce093
--- /dev/null
+++ b/test/553-invoke-super/src/SuperClass.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class SuperClass {
+  boolean doThrow = false;
+
+  public int $noinline$returnInt() {
+    if (doThrow) {
+      throw new Error();
+    }
+    return 42;
+  }
+}
diff --git a/test/555-UnsafeGetLong-regression/expected.txt b/test/555-UnsafeGetLong-regression/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/555-UnsafeGetLong-regression/info.txt b/test/555-UnsafeGetLong-regression/info.txt
new file mode 100644
index 0000000..0e16ed7
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/info.txt
@@ -0,0 +1,2 @@
+Regression test for sun.misc.Unsafe.getLong's intrinsic's locations
+not handled properly.
diff --git a/test/555-UnsafeGetLong-regression/src/Main.java b/test/555-UnsafeGetLong-regression/src/Main.java
new file mode 100644
index 0000000..1adafae
--- /dev/null
+++ b/test/555-UnsafeGetLong-regression/src/Main.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import sun.misc.Unsafe;
+
+public class Main {
+  private static void assertLongEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static Unsafe getUnsafe() throws Exception {
+    Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+    Field f = unsafeClass.getDeclaredField("theUnsafe");
+    f.setAccessible(true);
+    return (Unsafe) f.get(null);
+  }
+
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    Unsafe unsafe = getUnsafe();
+
+    testUnsafeGetLong(unsafe);
+  }
+
+  public static void testUnsafeGetLong(Unsafe unsafe) throws Exception {
+    TestClass test = new TestClass();
+    Field longField = TestClass.class.getDeclaredField("longVar");
+    long lvar = unsafe.objectFieldOffset(longField);
+    lvar = unsafe.getLong(test, lvar);
+    assertLongEquals(1122334455667788L, lvar);
+  }
+
+  private static class TestClass {
+    public long longVar = 1122334455667788L;
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index a590cf1..ebefeea 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -47,4 +47,5 @@
 b/23502994 (if-eqz)
 b/23502994 (check-cast)
 b/25494456
+b/21869691
 Done!
diff --git a/test/800-smali/smali/b_21869691A.smali b/test/800-smali/smali/b_21869691A.smali
new file mode 100644
index 0000000..a7a6ef4
--- /dev/null
+++ b/test/800-smali/smali/b_21869691A.smali
@@ -0,0 +1,47 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode.
+#
+# When using invoke-interface on a method id that is not from an interface class, we should throw
+# an IncompatibleClassChangeError. FindInterfaceMethod assumes that the given type is an interface,
+# so we can construct a class hierarchy that would have a surprising result:
+#
+#   interface I {
+#     void a();
+#   }
+#
+#   class B implements I {
+#      // miranda method for a, or a implemented.
+#   }
+#
+#   class C extends B {
+#   }
+#
+# Then calling invoke-interface C.a() will go wrong if there is no explicit check: a can't be found
+# in C, but in the interface table, so we will find an interface method and pass ICCE checks.
+#
+# If we do this before a correct invoke-virtual C.a(), we poison the dex cache with an incorrect
+# method. In this test, this is done in A (A < B, so processed first). The "real" call is in B.
+
+.class public LB21869691A;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public run()V
+  .registers 3
+  new-instance v0, LB21869691C;
+  invoke-direct {v0}, LB21869691C;-><init>()V
+  invoke-virtual {v2, v0}, LB21869691A;->callinf(LB21869691C;)V
+  return-void
+.end method
+
+.method public callinf(LB21869691C;)V
+  .registers 2
+  invoke-interface {p1}, LB21869691C;->a()V
+  return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691B.smali b/test/800-smali/smali/b_21869691B.smali
new file mode 100644
index 0000000..1172bdb
--- /dev/null
+++ b/test/800-smali/smali/b_21869691B.smali
@@ -0,0 +1,33 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode. See b_21869691A.smali for explanation.
+
+.class public abstract LB21869691B;
+
+.super Ljava/lang/Object;
+.implements LB21869691I;
+
+.method protected constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+# Have an implementation for the interface method.
+.method public a()V
+  .registers 1
+  return-void
+.end method
+
+# Call ourself with invoke-virtual.
+.method public callB()V
+  .registers 1
+  invoke-virtual {p0}, LB21869691B;->a()V
+  return-void
+.end method
+
+# Call C with invoke-virtual.
+.method public callB(LB21869691C;)V
+  .registers 2
+  invoke-virtual {p1}, LB21869691C;->a()V
+  return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691C.smali b/test/800-smali/smali/b_21869691C.smali
new file mode 100644
index 0000000..4f89a04
--- /dev/null
+++ b/test/800-smali/smali/b_21869691C.smali
@@ -0,0 +1,12 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode. See b_21869691A.smali for explanation.
+
+.class public LB21869691C;
+
+.super LB21869691B;
+
+.method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, LB21869691B;-><init>()V
+    return-void
+.end method
diff --git a/test/800-smali/smali/b_21869691I.smali b/test/800-smali/smali/b_21869691I.smali
new file mode 100644
index 0000000..72a27dd
--- /dev/null
+++ b/test/800-smali/smali/b_21869691I.smali
@@ -0,0 +1,11 @@
+# Test that the verifier does not stash methods incorrectly because they are being invoked with
+# the wrong opcode.
+#
+# This is the interface class that has an "a" method.
+
+.class public abstract interface LB21869691I;
+
+.super Ljava/lang/Object;
+
+.method public abstract a()V
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 4844848..3b62a46 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -139,6 +139,8 @@
                 new Object[] { "abc" }, null, null));
         testCases.add(new TestCase("b/25494456", "B25494456", "run", null, new VerifyError(),
                 null));
+        testCases.add(new TestCase("b/21869691", "B21869691A", "run", null,
+                new IncompatibleClassChangeError(), null));
     }
 
     public void runTests() {
@@ -208,7 +210,7 @@
                                                         tc.expectedException.getClass().getName() +
                                                         ", but got " + exc.getClass(), exc);
             } else {
-              // Expected exception, do nothing.
+                // Expected exception, do nothing.
             }
         } finally {
             if (errorReturn != null) {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index c830ad4..0925d36 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -258,8 +258,10 @@
 
 TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
 
+# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat
+  117-nopatchoat \
+  554-jit-profile-file
 
 ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
diff --git a/test/run-test b/test/run-test
index d0da34e..6e13b8a 100755
--- a/test/run-test
+++ b/test/run-test
@@ -669,9 +669,9 @@
 # -------------------------------
 # Return whether the Optimizing compiler has read barrier support for ARCH.
 function arch_supports_read_barrier() {
-  # Optimizing has read barrier support for ARM, x86 and x86-64 at the
+  # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the
   # moment.
-  [ "x$1" = xarm ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
+  [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
 }
 
 # Tests named '<number>-checker-*' will also have their CFGs verified with
@@ -739,8 +739,8 @@
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=16384
-  run_file_size_limit=16384
+  build_file_size_limit=24576
+  run_file_size_limit=24576
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 362ae25..adc4d03 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -19,7 +19,6 @@
  * Show site context and heap and class filter in "Objects" view?
  * Have a menu at the top of an object view with links to the sections?
  * Include ahat version and hprof file in the menu at the top of the page?
- * Show root types.
  * Heaped Table
    - Make sortable by clicking on headers.
  * For HeapTable with single heap shown, the heap name isn't centered?
@@ -77,6 +76,7 @@
  * Extracting bitmap data from bitmap instances.
  * Adding up allocations by stack frame.
  * Computing, for each instance, the other instances it dominates.
+ * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
  0.2 Oct 20, 2015
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index 0bf064e..fc7911b 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -19,6 +19,8 @@
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.RootType;
 import com.android.tools.perflib.heap.Snapshot;
 import com.android.tools.perflib.heap.StackFrame;
 import com.android.tools.perflib.heap.StackTrace;
@@ -29,8 +31,10 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
@@ -48,6 +52,11 @@
   // Collection of objects whose immediate dominator is the SENTINEL_ROOT.
   private List<Instance> mRooted;
 
+  // Map from roots to their types.
+  // Instances are only included if they are roots, and the collection of root
+  // types is guaranteed to be non-empty.
+  private Map<Instance, Collection<RootType>> mRoots;
+
   private Site mRootSite;
   private Map<Heap, Long> mHeapSizes;
 
@@ -113,6 +122,18 @@
       }
       mHeapSizes.put(heap, total);
     }
+
+    // Record the roots and their types.
+    mRoots = new HashMap<Instance, Collection<RootType>>();
+    for (RootObj root : snapshot.getGCRoots()) {
+      Instance inst = root.getReferredInstance();
+      Collection<RootType> types = mRoots.get(inst);
+      if (types == null) {
+        types = new HashSet<RootType>();
+        mRoots.put(inst, types);
+      }
+      types.add(root.getRootType());
+    }
   }
 
   // Note: This method is exposed for testing purposes.
@@ -140,6 +161,21 @@
     return mRooted;
   }
 
+  /**
+   * Returns true if the given instance is a root.
+   */
+  public boolean isRoot(Instance inst) {
+    return mRoots.containsKey(inst);
+  }
+
+  /**
+   * Returns the list of root types for the given instance, or null if the
+   * instance is not a root.
+   */
+  public Collection<RootType> getRootTypes(Instance inst) {
+    return mRoots.get(inst);
+  }
+
   public List<Heap> getHeaps() {
     return mHeaps;
   }
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index 34a5665..7a673f5 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -71,7 +71,7 @@
         }
 
         public DocString render(Instance element) {
-          return Value.render(element);
+          return Value.render(mSnapshot, element);
         }
       };
       return Collections.singletonList(value);
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 1305070..06023da 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -23,9 +23,11 @@
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.RootObj;
+import com.android.tools.perflib.heap.RootType;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -57,7 +59,7 @@
     }
 
     doc.title("Object %08x", inst.getUniqueId());
-    doc.big(Value.render(inst));
+    doc.big(Value.render(mSnapshot, inst));
 
     printAllocationSite(doc, query, inst);
     printDominatorPath(doc, query, inst);
@@ -65,27 +67,41 @@
     doc.section("Object Info");
     ClassObj cls = inst.getClassObj();
     doc.descriptions();
-    doc.description(DocString.text("Class"), Value.render(cls));
+    doc.description(DocString.text("Class"), Value.render(mSnapshot, cls));
     doc.description(DocString.text("Size"), DocString.format("%d", inst.getSize()));
     doc.description(
         DocString.text("Retained Size"),
         DocString.format("%d", inst.getTotalRetainedSize()));
     doc.description(DocString.text("Heap"), DocString.text(inst.getHeap().getName()));
+
+    Collection<RootType> rootTypes = mSnapshot.getRootTypes(inst);
+    if (rootTypes != null) {
+      DocString types = new DocString();
+      String comma = "";
+      for (RootType type : rootTypes) {
+        types.append(comma);
+        types.append(type.getName());
+        comma = ", ";
+      }
+      doc.description(DocString.text("Root Types"), types);
+    }
+
     doc.end();
 
     printBitmap(doc, inst);
     if (inst instanceof ClassInstance) {
-      printClassInstanceFields(doc, query, (ClassInstance)inst);
+      printClassInstanceFields(doc, query, mSnapshot, (ClassInstance)inst);
     } else if (inst instanceof ArrayInstance) {
-      printArrayElements(doc, query, (ArrayInstance)inst);
+      printArrayElements(doc, query, mSnapshot, (ArrayInstance)inst);
     } else if (inst instanceof ClassObj) {
-      printClassInfo(doc, query, (ClassObj)inst);
+      printClassInfo(doc, query, mSnapshot, (ClassObj)inst);
     }
-    printReferences(doc, query, inst);
+    printReferences(doc, query, mSnapshot, inst);
     printDominatedObjects(doc, query, inst);
   }
 
-  private static void printClassInstanceFields(Doc doc, Query query, ClassInstance inst) {
+  private static void printClassInstanceFields(
+      Doc doc, Query query, AhatSnapshot snapshot, ClassInstance inst) {
     doc.section("Fields");
     doc.table(new Column("Type"), new Column("Name"), new Column("Value"));
     SubsetSelector<ClassInstance.FieldValue> selector
@@ -94,31 +110,35 @@
       doc.row(
           DocString.text(field.getField().getType().toString()),
           DocString.text(field.getField().getName()),
-          Value.render(field.getValue()));
+          Value.render(snapshot, field.getValue()));
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printArrayElements(Doc doc, Query query, ArrayInstance array) {
+  private static void printArrayElements(
+      Doc doc, Query query, AhatSnapshot snapshot, ArrayInstance array) {
     doc.section("Array Elements");
     doc.table(new Column("Index", Column.Align.RIGHT), new Column("Value"));
     List<Object> elements = Arrays.asList(array.getValues());
     SubsetSelector<Object> selector = new SubsetSelector(query, ARRAY_ELEMENTS_ID, elements);
     int i = 0;
     for (Object elem : selector.selected()) {
-      doc.row(DocString.format("%d", i), Value.render(elem));
+      doc.row(DocString.format("%d", i), Value.render(snapshot, elem));
       i++;
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printClassInfo(Doc doc, Query query, ClassObj clsobj) {
+  private static void printClassInfo(
+      Doc doc, Query query, AhatSnapshot snapshot, ClassObj clsobj) {
     doc.section("Class Info");
     doc.descriptions();
-    doc.description(DocString.text("Super Class"), Value.render(clsobj.getSuperClassObj()));
-    doc.description(DocString.text("Class Loader"), Value.render(clsobj.getClassLoader()));
+    doc.description(DocString.text("Super Class"),
+        Value.render(snapshot, clsobj.getSuperClassObj()));
+    doc.description(DocString.text("Class Loader"),
+        Value.render(snapshot, clsobj.getClassLoader()));
     doc.end();
 
     doc.section("Static Fields");
@@ -131,13 +151,14 @@
       doc.row(
           DocString.text(field.getKey().getType().toString()),
           DocString.text(field.getKey().getName()),
-          Value.render(field.getValue()));
+          Value.render(snapshot, field.getValue()));
     }
     doc.end();
     selector.render(doc);
   }
 
-  private static void printReferences(Doc doc, Query query, Instance inst) {
+  private static void printReferences(
+      Doc doc, Query query, AhatSnapshot snapshot, Instance inst) {
     doc.section("Objects with References to this Object");
     if (inst.getHardReferences().isEmpty()) {
       doc.println(DocString.text("(none)"));
@@ -146,7 +167,7 @@
       List<Instance> references = inst.getHardReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, HARD_REFS_ID, references);
       for (Instance ref : selector.selected()) {
-        doc.row(Value.render(ref));
+        doc.row(Value.render(snapshot, ref));
       }
       doc.end();
       selector.render(doc);
@@ -158,7 +179,7 @@
       List<Instance> references = inst.getSoftReferences();
       SubsetSelector<Instance> selector = new SubsetSelector(query, SOFT_REFS_ID, references);
       for (Instance ref : selector.selected()) {
-        doc.row(Value.render(ref));
+        doc.row(Value.render(snapshot, ref));
       }
       doc.end();
       selector.render(doc);
@@ -217,7 +238,7 @@
             if (element == null) {
               return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
             } else {
-              return DocString.text("→ ").append(Value.render(element));
+              return DocString.text("→ ").append(Value.render(mSnapshot, element));
             }
           }
         };
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 8ad3f48..4cfb0a5 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -60,7 +60,7 @@
       doc.row(
           DocString.format("%,d", inst.getSize()),
           DocString.text(inst.getHeap().getName()),
-          Value.render(inst));
+          Value.render(mSnapshot, inst));
     }
     doc.end();
     selector.render(doc);
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index 0425a5a..839e220 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -101,7 +101,7 @@
                 site.getStackId(), site.getStackDepth(), info.heap.getName(), className),
             DocString.format("%,14d", info.numInstances)),
           DocString.text(info.heap.getName()),
-          Value.render(info.classObj));
+          Value.render(mSnapshot, info.classObj));
     }
     doc.end();
     selector.render(doc);
diff --git a/tools/ahat/src/Value.java b/tools/ahat/src/Value.java
index 7c969b3..847692b 100644
--- a/tools/ahat/src/Value.java
+++ b/tools/ahat/src/Value.java
@@ -32,21 +32,29 @@
   /**
    * Create a DocString representing a summary of the given instance.
    */
-  private static DocString renderInstance(Instance inst) {
-    DocString link = new DocString();
+  private static DocString renderInstance(AhatSnapshot snapshot, Instance inst) {
+    DocString formatted = new DocString();
     if (inst == null) {
-      link.append("(null)");
-      return link;
+      formatted.append("(null)");
+      return formatted;
     }
 
+    // Annotate roots as roots.
+    if (snapshot.isRoot(inst)) {
+      formatted.append("(root) ");
+    }
+
+
     // Annotate classes as classes.
+    DocString link = new DocString();
     if (inst instanceof ClassObj) {
       link.append("class ");
     }
 
     link.append(inst.toString());
+
     URI objTarget = DocString.formattedUri("object?id=%d", inst.getId());
-    DocString formatted = DocString.link(objTarget, link);
+    formatted.appendLink(objTarget, link);
 
     // Annotate Strings with their values.
     String stringValue = InstanceUtils.asString(inst, kMaxChars);
@@ -63,7 +71,7 @@
       // It should not be possible for a referent to refer back to the
       // reference object, even indirectly, so there shouldn't be any issues
       // with infinite recursion here.
-      formatted.append(renderInstance(referent));
+      formatted.append(renderInstance(snapshot, referent));
     }
 
     // Annotate DexCache with its location.
@@ -89,9 +97,9 @@
   /**
    * Create a DocString summarizing the given value.
    */
-  public static DocString render(Object val) {
+  public static DocString render(AhatSnapshot snapshot, Object val) {
     if (val instanceof Instance) {
-      return renderInstance((Instance)val);
+      return renderInstance(snapshot, (Instance)val);
     } else {
       return DocString.format("%s", val);
     }
diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh
index 47fc50f..c79f4b9 100755
--- a/tools/run-jdwp-tests.sh
+++ b/tools/run-jdwp-tests.sh
@@ -128,7 +128,7 @@
       --vm-arg -Djpda.settings.verbose=true \
       --vm-arg -Djpda.settings.syncPort=34016 \
       --vm-arg -Djpda.settings.transportAddress=127.0.0.1:55107 \
-      --vm-arg -Djpda.settings.debuggeeJavaPath="\"$art_debugee $image $debuggee_args\"" \
+      --vm-arg -Djpda.settings.debuggeeJavaPath="$art_debugee $image $debuggee_args" \
       --classpath $test_jar \
       --vm-arg -Xcompiler-option --vm-arg --debuggable \
       $test